| from PIL import Image |
| import torch |
| import numpy as np |
|
|
| from transformers import Qwen2_5_VLForConditionalGeneration |
|
|
| from diffusers import ( |
| QwenImagePipeline, |
| QwenImageTransformer2DModel, |
| QwenImageInpaintPipeline, |
| ) |
|
|
| from optimum.quanto import quantize, qint8, freeze |
|
|
|
|
| prompt = ( |
| "equirectangular, a woman and a man sitting at a cafe, the woman has red hair " |
| "and she's wearing purple sweater with a black scarf and a white hat, the man " |
| "is sitting on the other side of the table and he's wearing a white shirt with " |
| "a purple scarf and red hat, both of them are sipping their coffee while in the " |
| "table there's some cake slices on their respective plates, each with forks and " |
| "knives at each side." |
| ) |
| negative_prompt = "" |
| output_filename = "qwen_int8.png" |
| width, height = 2048, 1024 |
| true_cfg_scale = 4.0 |
| num_inference_steps = 25 |
| seed = 42 |
|
|
| lora_model_id = "ProGamerGov/qwen-360-diffusion" |
| lora_filename = "qwen-360-diffusion-int8-bf16-v1.safetensors" |
|
|
| |
| model_id = "Qwen/Qwen-Image" |
| torch_dtype = torch.bfloat16 |
| device = "cuda" |
|
|
| fix_seam = True |
| inpaint_strength, seam_width = 0.5, 0.10 |
|
|
|
|
| def shift_equirect(img): |
| """Horizontal 50% shift using torch.roll.""" |
| t = torch.from_numpy(np.array(img)).permute(2, 0, 1).float() / 255.0 |
| t = torch.roll(t, shifts=(0, t.shape[2] // 2), dims=(1, 2)) |
| return Image.fromarray((t.permute(1, 2, 0).numpy() * 255).astype(np.uint8)) |
|
|
|
|
| def create_seam_mask(w, h, frac=0.10): |
| """Create vertical seam mask as PIL Image (center seam).""" |
| mask = torch.zeros((h, w)) |
| seam_w = max(1, int(w * frac)) |
| c = w // 2 |
| mask[:, c - seam_w // 2:c + seam_w // 2] = 1.0 |
| return Image.fromarray((mask.numpy() * 255).astype("uint8"), "L") |
|
|
|
|
| def load_pipeline(text_encoder, transformer, mode="t2i"): |
| pip_class = QwenImagePipeline if mode == "t2i" else QwenImageInpaintPipeline |
| pipe = pip_class.from_pretrained( |
| model_id, |
| transformer=transformer, |
| text_encoder=text_encoder, |
| torch_dtype=torch_dtype, |
| use_safetensors=True, |
| low_cpu_mem_usage=True, |
| ) |
| pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) |
| pipe.enable_model_cpu_offload() |
| pipe.enable_vae_tiling() |
|
|
| |
| return pipe |
|
|
|
|
| def main(): |
| |
| transformer = QwenImageTransformer2DModel.from_pretrained( |
| model_id, |
| subfolder="transformer", |
| torch_dtype=torch_dtype, |
| low_cpu_mem_usage=True, |
| ) |
| quantize(transformer, weights=qint8) |
| freeze(transformer) |
|
|
| |
| text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained( |
| model_id, |
| subfolder="text_encoder", |
| torch_dtype=torch_dtype, |
| low_cpu_mem_usage=True, |
| device_map={"": "cpu"}, |
| ) |
| quantize(text_encoder, weights=qint8) |
| freeze(text_encoder) |
|
|
| |
| generator = torch.Generator(device=device).manual_seed(seed) |
| pipe = load_pipeline(text_encoder, transformer, mode="t2i") |
|
|
| |
| image = pipe( |
| prompt=prompt, |
| negative_prompt=negative_prompt, |
| width=width, |
| height=height, |
| num_inference_steps=num_inference_steps, |
| true_cfg_scale=true_cfg_scale, |
| generator=generator, |
| ).images[0] |
|
|
| image.save(output_filename) |
|
|
| |
| if fix_seam: |
| del pipe |
| if torch.cuda.is_available(): |
| torch.cuda.empty_cache() |
|
|
| shifted = shift_equirect(image) |
| mask = create_seam_mask(width, height, frac=seam_width) |
|
|
| pipe = load_pipeline(text_encoder, transformer, mode="i2i") |
| image_fixed = pipe( |
| prompt=prompt, |
| negative_prompt=negative_prompt, |
| image=shifted, |
| mask_image=mask, |
| strength=inpaint_strength, |
| width=width, |
| height=height, |
| num_inference_steps=num_inference_steps, |
| true_cfg_scale=true_cfg_scale, |
| generator=generator, |
| ).images[0] |
| image_fixed = shift_equirect(image_fixed) |
| image_fixed.save(output_filename.replace(".png", "_seamfix.png")) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|