SEVAQWERTY commited on
Commit
24ba517
·
verified ·
1 Parent(s): 7cdf102

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -35
  2. README.md +14 -14
  3. app.py +474 -0
  4. optimization.py +70 -0
  5. requirements.txt +11 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,14 @@
1
- ---
2
- title: Test
3
- emoji: 🌍
4
- colorFrom: yellow
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 6.8.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: test
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Qwen Image Edit 2511 Fast
3
+ emoji: 🏆💨
4
+ colorFrom: pink
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 6.2.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: Fast 4 step inference of Qwen Image Edit 2511
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,474 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import torch
5
+ import spaces
6
+
7
+ from PIL import Image
8
+ from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
9
+ # from optimization import optimize_pipeline_
10
+ # from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
11
+ # from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
+ # from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
13
+
14
+ from huggingface_hub import InferenceClient
15
+ import math
16
+
17
+ import os
18
+ import base64
19
+ from io import BytesIO
20
+ import json
21
+
22
+ SYSTEM_PROMPT = '''
23
+ # Edit Instruction Rewriter
24
+ You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
25
+
26
+ Please strictly follow the rewriting rules below:
27
+
28
+ ## 1. General Principles
29
+ - Keep the rewritten prompt **concise and comprehensive**. Avoid overly long sentences and unnecessary descriptive language.
30
+ - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
31
+ - Keep the main part of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
32
+ - All added objects or modifications must align with the logic and style of the scene in the input images.
33
+ - If multiple sub-images are to be generated, describe the content of each sub-image individually.
34
+
35
+ ## 2. Task-Type Handling Rules
36
+
37
+ ### 1. Add, Delete, Replace Tasks
38
+ - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
39
+ - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
40
+ > Original: "Add an animal"
41
+ > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
42
+ - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
43
+ - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
44
+
45
+ ### 2. Text Editing Tasks
46
+ - All text content must be enclosed in English double quotes `" "`. Keep the original language of the text, and keep the capitalization.
47
+ - Both adding new text and replacing existing text are text replacement tasks, For example:
48
+ - Replace "xx" to "yy"
49
+ - Replace the mask / bounding box to "yy"
50
+ - Replace the visual object to "yy"
51
+ - Specify text position, color, and layout only if user has required.
52
+ - If font is specified, keep the original language of the font.
53
+
54
+ ### 3. Human Editing Tasks
55
+ - Make the smallest changes to the given user's prompt.
56
+ - If changes to background, action, expression, camera shot, or ambient lighting are required, please list each modification individually.
57
+ - **Edits to makeup or facial features / expression must be subtle, not exaggerated, and must preserve the subject's identity consistency.**
58
+ > Original: "Add eyebrows to the face"
59
+ > Rewritten: "Slightly thicken the person's eyebrows with little change, look natural."
60
+
61
+ ### 4. Style Conversion or Enhancement Tasks
62
+ - If a style is specified, describe it concisely using key visual features. For example:
63
+ > Original: "Disco style"
64
+ > Rewritten: "1970s disco style: flashing lights, disco ball, mirrored walls, vibrant colors"
65
+ - For style reference, analyze the original image and extract key characteristics (color, composition, texture, lighting, artistic style, etc.), integrating them into the instruction.
66
+ - **Colorization tasks (including old photo restoration) must use the fixed template:**
67
+ "Restore and colorize the old photo."
68
+ - Clearly specify the object to be modified. For example:
69
+ > Original: Modify the subject in Picture 1 to match the style of Picture 2.
70
+ > Rewritten: Change the girl in Picture 1 to the ink-wash style of Picture 2 — rendered in black-and-white watercolor with soft color transitions.
71
+
72
+ ### 5. Material Replacement
73
+ - Clearly specify the object and the material. For example: "Change the material of the apple to papercut style."
74
+ - For text material replacement, use the fixed template:
75
+ "Change the material of text "xxxx" to laser style"
76
+
77
+ ### 6. Logo/Pattern Editing
78
+ - Material replacement should preserve the original shape and structure as much as possible. For example:
79
+ > Original: "Convert to sapphire material"
80
+ > Rewritten: "Convert the main subject in the image to sapphire material, preserving similar shape and structure"
81
+ - When migrating logos/patterns to new scenes, ensure shape and structure consistency. For example:
82
+ > Original: "Migrate the logo in the image to a new scene"
83
+ > Rewritten: "Migrate the logo in the image to a new scene, preserving similar shape and structure"
84
+
85
+ ### 7. Multi-Image Tasks
86
+ - Rewritten prompts must clearly point out which image's element is being modified. For example:
87
+ > Original: "Replace the subject of picture 1 with the subject of picture 2"
88
+ > Rewritten: "Replace the girl of picture 1 with the boy of picture 2, keeping picture 2's background unchanged"
89
+ - For stylization tasks, describe the reference image's style in the rewritten prompt, while preserving the visual content of the source image.
90
+
91
+ ## 3. Rationale and Logic Check
92
+ - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" requires logical correction.
93
+ - Supplement missing critical information: e.g., if position is unspecified, choose a reasonable area based on composition (near subject, blank space, center/edge, etc.).
94
+
95
+ # Output Format Example
96
+ ```json
97
+ {
98
+ "Rewritten": "..."
99
+ }
100
+ '''
101
+
102
+ def polish_prompt_hf(original_prompt, img_list):
103
+ """
104
+ Rewrites the prompt using a Hugging Face InferenceClient.
105
+ Supports multiple images via img_list.
106
+ """
107
+ # Ensure HF_TOKEN is set
108
+ api_key = os.environ.get("inference_providers")
109
+ if not api_key:
110
+ print("Warning: HF_TOKEN not set. Falling back to original prompt.")
111
+ return original_prompt
112
+ prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {original_prompt}\n\nRewritten Prompt:"
113
+ system_prompt = "you are a helpful assistant, you should provide useful answers to users."
114
+ try:
115
+ # Initialize the client
116
+ client = InferenceClient(
117
+ provider="nebius",
118
+ api_key=api_key,
119
+ )
120
+
121
+ # Convert list of images to base64 data URLs
122
+ image_urls = []
123
+ if img_list is not None:
124
+ # Ensure img_list is actually a list
125
+ if not isinstance(img_list, list):
126
+ img_list = [img_list]
127
+
128
+ for img in img_list:
129
+ image_url = None
130
+ # If img is a PIL Image
131
+ if hasattr(img, 'save'): # Check if it's a PIL Image
132
+ buffered = BytesIO()
133
+ img.save(buffered, format="PNG")
134
+ img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
135
+ image_url = f"data:image/png;base64,{img_base64}"
136
+ # If img is already a file path (string)
137
+ elif isinstance(img, str):
138
+ with open(img, "rb") as image_file:
139
+ img_base64 = base64.b64encode(image_file.read()).decode('utf-8')
140
+ image_url = f"data:image/png;base64,{img_base64}"
141
+ else:
142
+ print(f"Warning: Unexpected image type: {type(img)}, skipping...")
143
+ continue
144
+
145
+ if image_url:
146
+ image_urls.append(image_url)
147
+
148
+ # Build the content array with text first, then all images
149
+ content = [
150
+ {
151
+ "type": "text",
152
+ "text": prompt
153
+ }
154
+ ]
155
+
156
+ # Add all images to the content
157
+ for image_url in image_urls:
158
+ content.append({
159
+ "type": "image_url",
160
+ "image_url": {
161
+ "url": image_url
162
+ }
163
+ })
164
+
165
+ # Format the messages for the chat completions API
166
+ messages = [
167
+ {"role": "system", "content": system_prompt},
168
+ {
169
+ "role": "user",
170
+ "content": content
171
+ }
172
+ ]
173
+
174
+ # Call the API
175
+ completion = client.chat.completions.create(
176
+ model="Qwen/Qwen2.5-VL-72B-Instruct",
177
+ messages=messages,
178
+ )
179
+
180
+ # Parse the response
181
+ result = completion.choices[0].message.content
182
+
183
+ # Try to extract JSON if present
184
+ if '"Rewritten"' in result:
185
+ try:
186
+ # Clean up the response
187
+ result = result.replace('```json', '').replace('```', '')
188
+ result_json = json.loads(result)
189
+ polished_prompt = result_json.get('Rewritten', result)
190
+ except:
191
+ polished_prompt = result
192
+ else:
193
+ polished_prompt = result
194
+
195
+ polished_prompt = polished_prompt.strip().replace("\n", " ")
196
+ return polished_prompt
197
+
198
+ except Exception as e:
199
+ print(f"Error during API call to Hugging Face: {e}")
200
+ # Fallback to original prompt if enhancement fails
201
+ return original_prompt
202
+
203
+
204
+
205
+ def encode_image(pil_image):
206
+ import io
207
+ buffered = io.BytesIO()
208
+ pil_image.save(buffered, format="PNG")
209
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
210
+
211
+ # --- Model Loading ---
212
+ dtype = torch.bfloat16
213
+ device = "cuda" if torch.cuda.is_available() else "cpu"
214
+
215
+ # Scheduler configuration for Lightning
216
+ scheduler_config = {
217
+ "base_image_seq_len": 256,
218
+ "base_shift": math.log(3),
219
+ "invert_sigmas": False,
220
+ "max_image_seq_len": 8192,
221
+ "max_shift": math.log(3),
222
+ "num_train_timesteps": 1000,
223
+ "shift": 1.0,
224
+ "shift_terminal": None,
225
+ "stochastic_sampling": False,
226
+ "time_shift_type": "exponential",
227
+ "use_beta_sigmas": False,
228
+ "use_dynamic_shifting": True,
229
+ "use_exponential_sigmas": False,
230
+ "use_karras_sigmas": False,
231
+ }
232
+
233
+ # Initialize scheduler with Lightning config
234
+ scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
235
+
236
+ # Load the model pipeline
237
+ pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2511",
238
+ scheduler=scheduler,
239
+ torch_dtype=dtype).to(device)
240
+ pipe.load_lora_weights(
241
+ "lightx2v/Qwen-Image-Edit-2511-Lightning",
242
+ weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors"
243
+ )
244
+ pipe.fuse_lora()
245
+
246
+ # # Apply the same optimizations from the first version
247
+ # pipe.transformer.__class__ = QwenImageTransformer2DModel
248
+ # pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
249
+
250
+ # # --- Ahead-of-time compilation ---
251
+ # optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
252
+
253
+ # --- UI Constants and Helpers ---
254
+ MAX_SEED = np.iinfo(np.int32).max
255
+
256
+ def use_output_as_input(output_images):
257
+ """Convert output images to input format for the gallery"""
258
+ if output_images is None or len(output_images) == 0:
259
+ return []
260
+ return output_images
261
+
262
+ # --- Main Inference Function (with hardcoded negative prompt) ---
263
+ @spaces.GPU()
264
+ def infer(
265
+ images,
266
+ prompt,
267
+ seed=42,
268
+ randomize_seed=False,
269
+ true_guidance_scale=1.0,
270
+ num_inference_steps=4,
271
+ height=None,
272
+ width=None,
273
+ rewrite_prompt=True,
274
+ num_images_per_prompt=1,
275
+ progress=gr.Progress(track_tqdm=True),
276
+ ):
277
+ """
278
+ Run image-editing inference using the Qwen-Image-Edit pipeline.
279
+
280
+ Parameters:
281
+ images (list): Input images from the Gradio gallery (PIL or path-based).
282
+ prompt (str): Editing instruction (may be rewritten by LLM if enabled).
283
+ seed (int): Random seed for reproducibility.
284
+ randomize_seed (bool): If True, overrides seed with a random value.
285
+ true_guidance_scale (float): CFG scale used by Qwen-Image.
286
+ num_inference_steps (int): Number of diffusion steps.
287
+ height (int | None): Optional output height override.
288
+ width (int | None): Optional output width override.
289
+ rewrite_prompt (bool): Whether to rewrite the prompt using Qwen-2.5-VL.
290
+ num_images_per_prompt (int): Number of images to generate.
291
+ progress: Gradio progress callback.
292
+
293
+ Returns:
294
+ tuple: (generated_images, seed_used, UI_visibility_update)
295
+ """
296
+
297
+ # Hardcode the negative prompt as requested
298
+ negative_prompt = " "
299
+
300
+ if randomize_seed:
301
+ seed = random.randint(0, MAX_SEED)
302
+
303
+ # Set up the generator for reproducibility
304
+ generator = torch.Generator(device=device).manual_seed(seed)
305
+
306
+ # Load input images into PIL Images
307
+ pil_images = []
308
+ if images is not None:
309
+ for item in images:
310
+ try:
311
+ if isinstance(item[0], Image.Image):
312
+ pil_images.append(item[0].convert("RGB"))
313
+ elif isinstance(item[0], str):
314
+ pil_images.append(Image.open(item[0]).convert("RGB"))
315
+ elif hasattr(item, "name"):
316
+ pil_images.append(Image.open(item.name).convert("RGB"))
317
+ except Exception:
318
+ continue
319
+
320
+ if height==256 and width==256:
321
+ height, width = None, None
322
+ print(f"Calling pipeline with prompt: '{prompt}'")
323
+ print(f"Negative Prompt: '{negative_prompt}'")
324
+ print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}, Size: {width}x{height}")
325
+ if rewrite_prompt and len(pil_images) > 0:
326
+ prompt = polish_prompt_hf(prompt, pil_images)
327
+ print(f"Rewritten Prompt: {prompt}")
328
+
329
+
330
+ # Generate the image
331
+ image = pipe(
332
+ image=pil_images if len(pil_images) > 0 else None,
333
+ prompt=prompt,
334
+ height=height,
335
+ width=width,
336
+ negative_prompt=negative_prompt,
337
+ num_inference_steps=num_inference_steps,
338
+ generator=generator,
339
+ true_cfg_scale=true_guidance_scale,
340
+ num_images_per_prompt=num_images_per_prompt,
341
+ ).images
342
+
343
+ # Return images, seed, and make button visible
344
+ return image, seed, gr.update(visible=True)
345
+
346
+ # --- Examples and UI Layout ---
347
+ examples = []
348
+
349
+ css = """
350
+ #col-container {
351
+ margin: 0 auto;
352
+ max-width: 1024px;
353
+ }
354
+ #logo-title {
355
+ text-align: center;
356
+ }
357
+ #logo-title img {
358
+ width: 400px;
359
+ }
360
+ #edit_text{margin-top: -62px !important}
361
+ """
362
+
363
+ with gr.Blocks(css=css) as demo:
364
+ with gr.Column(elem_id="col-container"):
365
+ gr.HTML("""
366
+ <div id="logo-title">
367
+ <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo" width="400" style="display: block; margin: 0 auto;">
368
+ <h2 style="font-style: italic;color: #5b47d1;margin-top: -27px !important;margin-left: 96px">[Plus] Fast, 4-steps with LightX2V LoRA</h2>
369
+ </div>
370
+ """)
371
+ gr.Markdown("""
372
+ [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
373
+ This demo uses the new [Qwen-Image-Edit-2511](https://huggingface.co/Qwen/Qwen-Image-Edit-2511) with the [Qwen-Image-Lightning-2511](https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning) LoRA for accelerated inference.
374
+ Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) to run locally with ComfyUI or diffusers.
375
+ """)
376
+ with gr.Row():
377
+ with gr.Column():
378
+ input_images = gr.Gallery(label="Input Images",
379
+ show_label=False,
380
+ type="pil",
381
+ interactive=True)
382
+
383
+ with gr.Column():
384
+ result = gr.Gallery(label="Result", show_label=False, type="pil", interactive=False)
385
+ # Add this button right after the result gallery - initially hidden
386
+ use_output_btn = gr.Button("↗️ Use as input", variant="secondary", size="sm", visible=False)
387
+
388
+ with gr.Row():
389
+ prompt = gr.Text(
390
+ label="Prompt",
391
+ show_label=False,
392
+ placeholder="describe the edit instruction",
393
+ container=False,
394
+ )
395
+ run_button = gr.Button("Edit!", variant="primary")
396
+
397
+ with gr.Accordion("Advanced Settings", open=False):
398
+ # Negative prompt UI element is removed here
399
+
400
+ seed = gr.Slider(
401
+ label="Seed",
402
+ minimum=0,
403
+ maximum=MAX_SEED,
404
+ step=1,
405
+ value=0,
406
+ )
407
+
408
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
409
+
410
+ with gr.Row():
411
+
412
+ true_guidance_scale = gr.Slider(
413
+ label="True guidance scale",
414
+ minimum=1.0,
415
+ maximum=10.0,
416
+ step=0.1,
417
+ value=1.0
418
+ )
419
+
420
+ num_inference_steps = gr.Slider(
421
+ label="Number of inference steps",
422
+ minimum=1,
423
+ maximum=40,
424
+ step=1,
425
+ value=4,
426
+ )
427
+
428
+ height = gr.Slider(
429
+ label="Height",
430
+ minimum=256,
431
+ maximum=2048,
432
+ step=8,
433
+ value=None,
434
+ )
435
+
436
+ width = gr.Slider(
437
+ label="Width",
438
+ minimum=256,
439
+ maximum=2048,
440
+ step=8,
441
+ value=None,
442
+ )
443
+
444
+
445
+ rewrite_prompt = gr.Checkbox(label="Rewrite prompt", value=True)
446
+
447
+ # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
448
+
449
+ gr.on(
450
+ triggers=[run_button.click, prompt.submit],
451
+ fn=infer,
452
+ inputs=[
453
+ input_images,
454
+ prompt,
455
+ seed,
456
+ randomize_seed,
457
+ true_guidance_scale,
458
+ num_inference_steps,
459
+ height,
460
+ width,
461
+ rewrite_prompt,
462
+ ],
463
+ outputs=[result, seed, use_output_btn], # Added use_output_btn to outputs
464
+ )
465
+
466
+ # Add the new event handler for the "Use Output as Input" button
467
+ use_output_btn.click(
468
+ fn=use_output_as_input,
469
+ inputs=[result],
470
+ outputs=[input_images]
471
+ )
472
+
473
+ if __name__ == "__main__":
474
+ demo.launch(mcp_server=True)
optimization.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ """
3
+
4
+ from typing import Any
5
+ from typing import Callable
6
+ from typing import ParamSpec
7
+ from torchao.quantization import quantize_
8
+ from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
9
+ import spaces
10
+ import torch
11
+ from torch.utils._pytree import tree_map
12
+
13
+
14
+ P = ParamSpec('P')
15
+
16
+
17
+ TRANSFORMER_IMAGE_SEQ_LENGTH_DIM = torch.export.Dim('image_seq_length')
18
+ TRANSFORMER_TEXT_SEQ_LENGTH_DIM = torch.export.Dim('text_seq_length')
19
+
20
+ TRANSFORMER_DYNAMIC_SHAPES = {
21
+ 'hidden_states': {
22
+ 1: TRANSFORMER_IMAGE_SEQ_LENGTH_DIM,
23
+ },
24
+ 'encoder_hidden_states': {
25
+ 1: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
26
+ },
27
+ 'encoder_hidden_states_mask': {
28
+ 1: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
29
+ },
30
+ 'image_rotary_emb': ({
31
+ 0: TRANSFORMER_IMAGE_SEQ_LENGTH_DIM,
32
+ }, {
33
+ 0: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
34
+ }),
35
+ }
36
+
37
+
38
+ INDUCTOR_CONFIGS = {
39
+ 'conv_1x1_as_mm': True,
40
+ 'epilogue_fusion': False,
41
+ 'coordinate_descent_tuning': True,
42
+ 'coordinate_descent_check_all_directions': True,
43
+ 'max_autotune': True,
44
+ 'triton.cudagraphs': True,
45
+ }
46
+
47
+
48
+ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
49
+
50
+ @spaces.GPU(duration=1500)
51
+ def compile_transformer():
52
+
53
+ with spaces.aoti_capture(pipeline.transformer) as call:
54
+ pipeline(*args, **kwargs)
55
+
56
+ dynamic_shapes = tree_map(lambda t: None, call.kwargs)
57
+ dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
58
+
59
+ # quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
60
+
61
+ exported = torch.export.export(
62
+ mod=pipeline.transformer,
63
+ args=call.args,
64
+ kwargs=call.kwargs,
65
+ dynamic_shapes=dynamic_shapes,
66
+ )
67
+
68
+ return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
69
+
70
+ spaces.aoti_apply(compile_transformer(), pipeline.transformer)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/diffusers.git
2
+
3
+ transformers
4
+ accelerate
5
+ safetensors
6
+ sentencepiece
7
+ dashscope
8
+ kernels
9
+ torchvision
10
+ peft
11
+ torchao==0.11.0