File size: 11,861 Bytes
55fe803
62594be
55fe803
fff1b72
 
 
 
 
 
a41840b
55fe803
9f04f38
a41840b
9f04f38
 
55fe803
 
 
 
9f04f38
eee0c8d
a69f969
a41840b
9f04f38
 
 
a41840b
 
a69f969
a41840b
 
 
 
 
 
eee0c8d
a69f969
 
 
 
fff1b72
a69f969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55fe803
a41840b
 
 
 
 
 
 
 
 
 
 
62594be
9f04f38
55fe803
 
 
9f04f38
55fe803
 
 
 
e28279b
55fe803
 
 
 
9f04f38
a41840b
 
 
9f04f38
 
 
a41840b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a69f969
 
 
 
fff1b72
 
 
 
 
 
 
9f04f38
 
 
 
 
 
55fe803
9f04f38
55fe803
 
e28279b
 
 
 
 
 
 
 
 
 
 
 
 
 
a69f969
 
 
 
 
e28279b
55fe803
 
e28279b
9f04f38
e28279b
 
55fe803
9f04f38
 
 
a41840b
9f04f38
 
 
 
 
 
a41840b
e28279b
9f04f38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a41840b
9f04f38
 
 
a41840b
a69f969
a41840b
 
a69f969
a41840b
9f04f38
 
 
a69f969
9f04f38
e28279b
9f04f38
a69f969
 
 
 
9f04f38
 
 
 
 
 
 
 
 
 
 
 
 
a41840b
 
 
 
 
9f04f38
e28279b
 
 
9f04f38
a41840b
a69f969
e28279b
a41840b
55fe803
 
 
e28279b
 
 
 
 
 
 
 
 
 
 
9f04f38
 
a41840b
9f04f38
e28279b
79c4bb8
e28279b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import torch
import spaces
import gradio as gr
from diffusers import (
    DiffusionPipeline,
    FluxImg2ImgPipeline,
    StableDiffusionInstructPix2PixPipeline,
    StableDiffusionXLImg2ImgPipeline,
)
from PIL import Image

print("Loading pipelines...")

# Text to image pipeline
pipe_t2i = DiffusionPipeline.from_pretrained(
    "Tongyi-MAI/Z-Image-Turbo",
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=False,
)
pipe_t2i.to("cuda")

# FLUX
pipe_flux = FluxImg2ImgPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell",
    torch_dtype=torch.bfloat16,
)
pipe_flux.to("cuda")

# InstructPix2Pix
pipe_ip2p = StableDiffusionInstructPix2PixPipeline.from_pretrained(
    "timbrooks/instruct-pix2pix",
    torch_dtype=torch.float16,
    safety_checker=None,
)
pipe_ip2p.to("cuda")

print("Pipelines loaded! Portrait models load on first use.")

# Portrait models β€” lazy loaded on first selection to save memory
portrait_pipes = {}

PORTRAIT_MODELS = {
    "SDXL Base (Balanced quality & control)": {
        "repo": "stabilityai/stable-diffusion-xl-base-1.0",
        "variant": "fp16",
    },
    "Juggernaut XL (Photorealistic portraits)": {
        "repo": "RunDiffusion/Juggernaut-XL-v9",
        "variant": None,
    },
    "DreamShaper XL (Creative portraits)": {
        "repo": "Lykon/dreamshaper-xl-v2-turbo",
        "variant": None,
    },
    "Realistic Vision XL (Portrait photography)": {
        "repo": "SG161222/RealVisXL_V4.0",
        "variant": "fp16",
    },
    "CyberRealistic XL (Detailed skin & faces)": {
        "repo": "stablediffusionapi/cyberrealistic-xl",
        "variant": None,
    },
    "DreamShaper (SD1.5, fast portraits)": {
        "repo": "Lykon/DreamShaper",
        "variant": None,
        "sd15": True,
    },
}

def get_portrait_pipe(model_name):
    """Lazy load portrait models on first use."""
    if model_name not in portrait_pipes:
        print(f"Loading {model_name} for the first time...")
        cfg = PORTRAIT_MODELS[model_name]
        kwargs = dict(torch_dtype=torch.float16, use_safetensors=True)
        if cfg.get("variant"):
            kwargs["variant"] = cfg["variant"]
        pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(cfg["repo"], **kwargs)
        pipe.to("cuda")
        portrait_pipes[model_name] = pipe
        print(f"{model_name} loaded!")
    return portrait_pipes[model_name]

def resize_image(image, max_size=1024):
    orig_w, orig_h = image.size
    scale = min(max_size / orig_w, max_size / orig_h)
    if scale < 1:
        new_w = round(orig_w * scale / 64) * 64
        new_h = round(orig_h * scale / 64) * 64
    else:
        new_w = round(orig_w / 64) * 64
        new_h = round(orig_h / 64) * 64
    return image.resize((new_w, new_h))

@spaces.GPU
def generate_t2i(prompt, height, width, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = torch.randint(0, 2**32 - 1, (1,)).item()
    generator = torch.Generator("cuda").manual_seed(int(seed))
    image = pipe_t2i(
        prompt=prompt,
        height=int(height),
        width=int(width),
        num_inference_steps=int(num_inference_steps),
        guidance_scale=0.0,
        generator=generator,
    ).images[0]
    return image, seed

@spaces.GPU
def generate_i2i(model_choice, input_image, prompt, strength, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    if input_image is None:
        raise gr.Error("Please upload an image first.")
    if randomize_seed:
        seed = torch.randint(0, 2**32 - 1, (1,)).item()
    generator = torch.Generator("cuda").manual_seed(int(seed))
    input_image = resize_image(input_image)

    if model_choice == "FLUX.1-schnell (Creative, high change)":
        image = pipe_flux(
            prompt=prompt,
            image=input_image,
            strength=float(strength),
            num_inference_steps=int(num_inference_steps),
            generator=generator,
            width=input_image.width,
            height=input_image.height,
        ).images[0]

    elif model_choice == "InstructPix2Pix (Precise, preserves identity)":
        image = pipe_ip2p(
            prompt=prompt,
            image=input_image,
            num_inference_steps=int(num_inference_steps),
            image_guidance_scale=1.5,
            guidance_scale=7.5,
            generator=generator,
        ).images[0]

    else:
        # Portrait models (lazy loaded)
        pipe = get_portrait_pipe(model_choice)
        image = pipe(
            prompt=prompt,
            image=input_image,
            strength=float(strength),
            num_inference_steps=int(num_inference_steps),
            generator=generator,
        ).images[0]

    return image, seed

examples_t2i = [
    ["Young Chinese woman in red Hanfu, intricate embroidery, elaborate high bun, golden phoenix headdress"],
    ["A majestic dragon soaring through clouds at sunset, scales shimmering with iridescent colors"],
    ["Cozy coffee shop interior, warm lighting, rain on windows, plants on shelves, photorealistic"],
    ["Astronaut riding a horse on Mars, cinematic lighting, sci-fi concept art, highly detailed"],
    ["Portrait of a wise old wizard with a long white beard, holding a glowing crystal staff"],
]

custom_theme = gr.themes.Soft(
    primary_hue="yellow",
    secondary_hue="amber",
    neutral_hue="slate",
    font=gr.themes.GoogleFont("Inter"),
    text_size="lg",
    spacing_size="md",
    radius_size="lg"
).set(
    button_primary_background_fill="*primary_500",
    button_primary_background_fill_hover="*primary_600",
    block_title_text_weight="600",
)

I2I_MODELS = [
    "FLUX.1-schnell (Creative, high change)",
    "InstructPix2Pix (Precise, preserves identity)",
] + list(PORTRAIT_MODELS.keys())

with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(
        """
        # 🎨 Z-Image-Turbo
        **Ultra-fast AI image generation & editing** β€’ Text to Image + Image to Image
        """,
        elem_classes="header-text"
    )

    with gr.Tabs():

        # ── Tab 1: Text to Image ──────────────────────────────────
        with gr.Tab("✨ Text to Image"):
            with gr.Row(equal_height=False):
                with gr.Column(scale=1, min_width=320):
                    t2i_prompt = gr.Textbox(
                        label="✨ Your Prompt",
                        placeholder="Describe the image you want to create...",
                        lines=5, max_lines=10, autofocus=True,
                    )
                    with gr.Accordion("βš™οΈ Advanced Settings", open=False):
                        with gr.Row():
                            t2i_height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
                            t2i_width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
                        t2i_steps = gr.Slider(1, 20, value=9, step=1, label="Inference Steps")
                        with gr.Row():
                            t2i_randomize = gr.Checkbox(label="🎲 Random Seed", value=True)
                            t2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False)
                        t2i_randomize.change(
                            lambda r: gr.Number(visible=not r),
                            inputs=[t2i_randomize], outputs=[t2i_seed]
                        )
                    t2i_btn = gr.Button("πŸš€ Generate Image", variant="primary", size="lg")
                    gr.Examples(examples=examples_t2i, inputs=[t2i_prompt], label="πŸ’‘ Try these prompts")

                with gr.Column(scale=1, min_width=320):
                    t2i_output = gr.Image(label="Generated Image", type="pil", format="png", show_label=False, height=600)
                    t2i_used_seed = gr.Number(label="🎲 Seed Used", interactive=False)

            t2i_btn.click(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed])
            t2i_prompt.submit(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed])

        # ── Tab 2: Image to Image ─────────────────────────────────
        with gr.Tab("πŸ–ΌοΈ Image to Image"):
            with gr.Row(equal_height=False):
                with gr.Column(scale=1, min_width=320):
                    model_choice = gr.Radio(
                        choices=I2I_MODELS,
                        value="InstructPix2Pix (Precise, preserves identity)",
                        label="πŸ€– Model",
                        info="InstructPix2Pix: best for targeted edits. Portrait models: high quality people & faces. FLUX: creative transformations. Note: portrait models load on first use."
                    )
                    i2i_input = gr.Image(label="Upload Image", type="pil")
                    i2i_prompt = gr.Textbox(
                        label="✨ Edit Instruction",
                        placeholder="e.g. 'woman in blue dress, photorealistic portrait' or 'make it a sunset'",
                        lines=4,
                    )
                    with gr.Accordion("βš™οΈ Advanced Settings", open=False):
                        i2i_strength = gr.Slider(0.1, 1.0, value=0.65, step=0.05,
                            label="Strength (all except InstructPix2Pix)",
                            info="Lower = more faithful to original. Higher = more creative change.")
                        i2i_steps = gr.Slider(1, 50, value=25, step=1, label="Inference Steps")
                        with gr.Row():
                            i2i_randomize = gr.Checkbox(label="🎲 Random Seed", value=True)
                            i2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False)
                        i2i_randomize.change(
                            lambda r: gr.Number(visible=not r),
                            inputs=[i2i_randomize], outputs=[i2i_seed]
                        )
                    i2i_btn = gr.Button("πŸš€ Edit Image", variant="primary", size="lg")

                with gr.Column(scale=1, min_width=320):
                    i2i_output = gr.Image(label="Result", type="pil", format="png", show_label=False, height=600)
                    i2i_used_seed = gr.Number(label="🎲 Seed Used", interactive=False)

            i2i_btn.click(
                generate_i2i,
                [model_choice, i2i_input, i2i_prompt, i2i_strength, i2i_steps, i2i_seed, i2i_randomize],
                [i2i_output, i2i_used_seed]
            )

    gr.Markdown(
        """
        ---
        <div style="text-align: center; opacity: 0.7; font-size: 0.9em;">
        <strong>T2I:</strong> Tongyi-MAI/Z-Image-Turbo β€’ 
        <strong>I2I:</strong> FLUX.1-schnell + InstructPix2Pix + Juggernaut XL + DreamShaper XL + RealVisXL + CyberRealistic XL
        </div>
        """
    )

if __name__ == "__main__":
    demo.launch(
        theme=custom_theme,
        css="""
        .header-text h1 {
            font-size: 2.5rem !important;
            font-weight: 700 !important;
            background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
            background-clip: text;
        }
        .header-text p { font-size: 1.1rem !important; color: #64748b !important; }
        .gradio-container { max-width: 1400px !important; margin: 0 auto !important; }
        button { transition: all 0.2s ease !important; }
        button:hover { transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,0,0,0.15) !important; }
        """,
        mcp_server=True
    )