Spaces:
Sleeping
Sleeping
| import torch | |
| import spaces | |
| import gradio as gr | |
| from diffusers import ( | |
| DiffusionPipeline, | |
| FluxImg2ImgPipeline, | |
| StableDiffusionInstructPix2PixPipeline, | |
| StableDiffusionXLImg2ImgPipeline, | |
| ) | |
| from PIL import Image | |
| print("Loading pipelines...") | |
| # Text to image pipeline | |
| pipe_t2i = DiffusionPipeline.from_pretrained( | |
| "Tongyi-MAI/Z-Image-Turbo", | |
| torch_dtype=torch.bfloat16, | |
| low_cpu_mem_usage=False, | |
| ) | |
| pipe_t2i.to("cuda") | |
| # FLUX | |
| pipe_flux = FluxImg2ImgPipeline.from_pretrained( | |
| "black-forest-labs/FLUX.1-schnell", | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| pipe_flux.to("cuda") | |
| # InstructPix2Pix | |
| pipe_ip2p = StableDiffusionInstructPix2PixPipeline.from_pretrained( | |
| "timbrooks/instruct-pix2pix", | |
| torch_dtype=torch.float16, | |
| safety_checker=None, | |
| ) | |
| pipe_ip2p.to("cuda") | |
| print("Pipelines loaded! Portrait models load on first use.") | |
| # Portrait models β lazy loaded on first selection to save memory | |
| portrait_pipes = {} | |
| PORTRAIT_MODELS = { | |
| "SDXL Base (Balanced quality & control)": { | |
| "repo": "stabilityai/stable-diffusion-xl-base-1.0", | |
| "variant": "fp16", | |
| }, | |
| "Juggernaut XL (Photorealistic portraits)": { | |
| "repo": "RunDiffusion/Juggernaut-XL-v9", | |
| "variant": None, | |
| }, | |
| "DreamShaper XL (Creative portraits)": { | |
| "repo": "Lykon/dreamshaper-xl-v2-turbo", | |
| "variant": None, | |
| }, | |
| "Realistic Vision XL (Portrait photography)": { | |
| "repo": "SG161222/RealVisXL_V4.0", | |
| "variant": "fp16", | |
| }, | |
| "CyberRealistic XL (Detailed skin & faces)": { | |
| "repo": "stablediffusionapi/cyberrealistic-xl", | |
| "variant": None, | |
| }, | |
| "DreamShaper (SD1.5, fast portraits)": { | |
| "repo": "Lykon/DreamShaper", | |
| "variant": None, | |
| "sd15": True, | |
| }, | |
| } | |
| def get_portrait_pipe(model_name): | |
| """Lazy load portrait models on first use.""" | |
| if model_name not in portrait_pipes: | |
| print(f"Loading {model_name} for the first time...") | |
| cfg = PORTRAIT_MODELS[model_name] | |
| kwargs = dict(torch_dtype=torch.float16, use_safetensors=True) | |
| if cfg.get("variant"): | |
| kwargs["variant"] = cfg["variant"] | |
| pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(cfg["repo"], **kwargs) | |
| pipe.to("cuda") | |
| portrait_pipes[model_name] = pipe | |
| print(f"{model_name} loaded!") | |
| return portrait_pipes[model_name] | |
| def resize_image(image, max_size=1024): | |
| orig_w, orig_h = image.size | |
| scale = min(max_size / orig_w, max_size / orig_h) | |
| if scale < 1: | |
| new_w = round(orig_w * scale / 64) * 64 | |
| new_h = round(orig_h * scale / 64) * 64 | |
| else: | |
| new_w = round(orig_w / 64) * 64 | |
| new_h = round(orig_h / 64) * 64 | |
| return image.resize((new_w, new_h)) | |
| def generate_t2i(prompt, height, width, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)): | |
| if randomize_seed: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| generator = torch.Generator("cuda").manual_seed(int(seed)) | |
| image = pipe_t2i( | |
| prompt=prompt, | |
| height=int(height), | |
| width=int(width), | |
| num_inference_steps=int(num_inference_steps), | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| return image, seed | |
| def generate_i2i(model_choice, input_image, prompt, strength, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)): | |
| if input_image is None: | |
| raise gr.Error("Please upload an image first.") | |
| if randomize_seed: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| generator = torch.Generator("cuda").manual_seed(int(seed)) | |
| input_image = resize_image(input_image) | |
| if model_choice == "FLUX.1-schnell (Creative, high change)": | |
| image = pipe_flux( | |
| prompt=prompt, | |
| image=input_image, | |
| strength=float(strength), | |
| num_inference_steps=int(num_inference_steps), | |
| generator=generator, | |
| width=input_image.width, | |
| height=input_image.height, | |
| ).images[0] | |
| elif model_choice == "InstructPix2Pix (Precise, preserves identity)": | |
| image = pipe_ip2p( | |
| prompt=prompt, | |
| image=input_image, | |
| num_inference_steps=int(num_inference_steps), | |
| image_guidance_scale=1.5, | |
| guidance_scale=7.5, | |
| generator=generator, | |
| ).images[0] | |
| else: | |
| # Portrait models (lazy loaded) | |
| pipe = get_portrait_pipe(model_choice) | |
| image = pipe( | |
| prompt=prompt, | |
| image=input_image, | |
| strength=float(strength), | |
| num_inference_steps=int(num_inference_steps), | |
| generator=generator, | |
| ).images[0] | |
| return image, seed | |
| examples_t2i = [ | |
| ["Young Chinese woman in red Hanfu, intricate embroidery, elaborate high bun, golden phoenix headdress"], | |
| ["A majestic dragon soaring through clouds at sunset, scales shimmering with iridescent colors"], | |
| ["Cozy coffee shop interior, warm lighting, rain on windows, plants on shelves, photorealistic"], | |
| ["Astronaut riding a horse on Mars, cinematic lighting, sci-fi concept art, highly detailed"], | |
| ["Portrait of a wise old wizard with a long white beard, holding a glowing crystal staff"], | |
| ] | |
| custom_theme = gr.themes.Soft( | |
| primary_hue="yellow", | |
| secondary_hue="amber", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| text_size="lg", | |
| spacing_size="md", | |
| radius_size="lg" | |
| ).set( | |
| button_primary_background_fill="*primary_500", | |
| button_primary_background_fill_hover="*primary_600", | |
| block_title_text_weight="600", | |
| ) | |
| I2I_MODELS = [ | |
| "FLUX.1-schnell (Creative, high change)", | |
| "InstructPix2Pix (Precise, preserves identity)", | |
| ] + list(PORTRAIT_MODELS.keys()) | |
| with gr.Blocks(fill_height=True) as demo: | |
| gr.Markdown( | |
| """ | |
| # π¨ Z-Image-Turbo | |
| **Ultra-fast AI image generation & editing** β’ Text to Image + Image to Image | |
| """, | |
| elem_classes="header-text" | |
| ) | |
| with gr.Tabs(): | |
| # ββ Tab 1: Text to Image ββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("β¨ Text to Image"): | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=1, min_width=320): | |
| t2i_prompt = gr.Textbox( | |
| label="β¨ Your Prompt", | |
| placeholder="Describe the image you want to create...", | |
| lines=5, max_lines=10, autofocus=True, | |
| ) | |
| with gr.Accordion("βοΈ Advanced Settings", open=False): | |
| with gr.Row(): | |
| t2i_height = gr.Slider(512, 2048, value=1024, step=64, label="Height") | |
| t2i_width = gr.Slider(512, 2048, value=1024, step=64, label="Width") | |
| t2i_steps = gr.Slider(1, 20, value=9, step=1, label="Inference Steps") | |
| with gr.Row(): | |
| t2i_randomize = gr.Checkbox(label="π² Random Seed", value=True) | |
| t2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False) | |
| t2i_randomize.change( | |
| lambda r: gr.Number(visible=not r), | |
| inputs=[t2i_randomize], outputs=[t2i_seed] | |
| ) | |
| t2i_btn = gr.Button("π Generate Image", variant="primary", size="lg") | |
| gr.Examples(examples=examples_t2i, inputs=[t2i_prompt], label="π‘ Try these prompts") | |
| with gr.Column(scale=1, min_width=320): | |
| t2i_output = gr.Image(label="Generated Image", type="pil", format="png", show_label=False, height=600) | |
| t2i_used_seed = gr.Number(label="π² Seed Used", interactive=False) | |
| t2i_btn.click(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed]) | |
| t2i_prompt.submit(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed]) | |
| # ββ Tab 2: Image to Image βββββββββββββββββββββββββββββββββ | |
| with gr.Tab("πΌοΈ Image to Image"): | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=1, min_width=320): | |
| model_choice = gr.Radio( | |
| choices=I2I_MODELS, | |
| value="InstructPix2Pix (Precise, preserves identity)", | |
| label="π€ Model", | |
| info="InstructPix2Pix: best for targeted edits. Portrait models: high quality people & faces. FLUX: creative transformations. Note: portrait models load on first use." | |
| ) | |
| i2i_input = gr.Image(label="Upload Image", type="pil") | |
| i2i_prompt = gr.Textbox( | |
| label="β¨ Edit Instruction", | |
| placeholder="e.g. 'woman in blue dress, photorealistic portrait' or 'make it a sunset'", | |
| lines=4, | |
| ) | |
| with gr.Accordion("βοΈ Advanced Settings", open=False): | |
| i2i_strength = gr.Slider(0.1, 1.0, value=0.65, step=0.05, | |
| label="Strength (all except InstructPix2Pix)", | |
| info="Lower = more faithful to original. Higher = more creative change.") | |
| i2i_steps = gr.Slider(1, 50, value=25, step=1, label="Inference Steps") | |
| with gr.Row(): | |
| i2i_randomize = gr.Checkbox(label="π² Random Seed", value=True) | |
| i2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False) | |
| i2i_randomize.change( | |
| lambda r: gr.Number(visible=not r), | |
| inputs=[i2i_randomize], outputs=[i2i_seed] | |
| ) | |
| i2i_btn = gr.Button("π Edit Image", variant="primary", size="lg") | |
| with gr.Column(scale=1, min_width=320): | |
| i2i_output = gr.Image(label="Result", type="pil", format="png", show_label=False, height=600) | |
| i2i_used_seed = gr.Number(label="π² Seed Used", interactive=False) | |
| i2i_btn.click( | |
| generate_i2i, | |
| [model_choice, i2i_input, i2i_prompt, i2i_strength, i2i_steps, i2i_seed, i2i_randomize], | |
| [i2i_output, i2i_used_seed] | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| <div style="text-align: center; opacity: 0.7; font-size: 0.9em;"> | |
| <strong>T2I:</strong> Tongyi-MAI/Z-Image-Turbo β’ | |
| <strong>I2I:</strong> FLUX.1-schnell + InstructPix2Pix + Juggernaut XL + DreamShaper XL + RealVisXL + CyberRealistic XL | |
| </div> | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| theme=custom_theme, | |
| css=""" | |
| .header-text h1 { | |
| font-size: 2.5rem !important; | |
| font-weight: 700 !important; | |
| background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| } | |
| .header-text p { font-size: 1.1rem !important; color: #64748b !important; } | |
| .gradio-container { max-width: 1400px !important; margin: 0 auto !important; } | |
| button { transition: all 0.2s ease !important; } | |
| button:hover { transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,0,0,0.15) !important; } | |
| """, | |
| mcp_server=True | |
| ) |