How to use z image turbo for image to image in python

#10
by rockapaper - opened

How to use z image turbo for image to image in python

install before
pip install torch==2.9.1 diffusers gradio==6.3.0 nunchaku==1.2.0 Pillow

import warnings
# Filter PyTorch indexing warnings
warnings.filterwarnings('ignore', message='Using a non-tuple sequence for multidimensional indexing is deprecated')
warnings.filterwarnings('ignore', category=UserWarning, module='nunchaku.utils')

import torch
import gradio as gr
from diffusers.pipelines.z_image.pipeline_z_image import ZImagePipeline
from nunchaku import NunchakuZImageTransformer2DModel
from nunchaku.utils import get_precision, is_turing
from datetime import datetime
from pathlib import Path
from PIL import Image
import os

class ZImageGenerator:
    def __init__(self):
        self.pipe = None
        self.precision = None
        self.rank = None
        self.dtype = None
        self.outputs_dir = Path("outputs")
        self.outputs_dir.mkdir(exist_ok=True)
        
        # Get script directory
        self.script_dir = Path(__file__).parent.resolve()
        
    def get_local_model_path(self, rank):
        """Get local model path"""
        precision = get_precision()
        model_filename = f"svdq-{precision}_r{rank}-z-image-turbo.safetensors"
        
        # First check ComfyUI directory
        comfyui_path = self.script_dir / "ComfyUI" / "models" / "diffusion_models" / model_filename
        if comfyui_path.exists():
            return str(comfyui_path)
        
        # Check models directory
        models_path = self.script_dir / "models" / model_filename
        if models_path.exists():
            return str(models_path)
        
        # Check script directory
        direct_path = self.script_dir / model_filename
        if direct_path.exists():
            return str(direct_path)
        
        return None
        
    def load_model(self, rank=128):
        """Load model and keep it in CUDA memory"""
        if self.pipe is not None and self.rank == rank:
            return "Model already loaded and ready!"
        
        print(f"Loading model... (Rank: {rank})")
        self.precision = get_precision()
        self.rank = rank
        self.dtype = torch.float16 if is_turing() else torch.bfloat16
        
        # Check for local model first
        local_model_path = self.get_local_model_path(rank)
        
        if local_model_path:
            print(f"Local model found: {local_model_path}")
            # Load transformer from local file
            transformer = NunchakuZImageTransformer2DModel.from_pretrained(
                local_model_path,
                torch_dtype=self.dtype
            )
        else:
            print(f"Local model not found, downloading from HuggingFace...")
            # Load transformer from HuggingFace
            transformer = NunchakuZImageTransformer2DModel.from_pretrained(
                f"nunchaku-tech/nunchaku-z-image-turbo/svdq-{self.precision}_r{self.rank}-z-image-turbo.safetensors",
                torch_dtype=self.dtype
            )
        
        # Create pipeline
        self.pipe = ZImagePipeline.from_pretrained(
            "Tongyi-MAI/Z-Image-Turbo",
            transformer=transformer,
            torch_dtype=self.dtype,
            low_cpu_mem_usage=False
        )
        
        # Move model to GPU (keep in CUDA memory)
        self.pipe = self.pipe.to("cuda")
        
        return f"Model loaded successfully! (Precision: {self.precision}, Rank: {self.rank}, Dtype: {self.dtype})"
    
    def generate_image(self, prompt, height, width, num_steps, seed, rank):
        """Generate image"""
        if self.pipe is None or self.rank != rank:
            status = self.load_model(rank)
            yield None, status
        
        try:
            # Create generator
            generator = torch.Generator(device="cuda").manual_seed(seed)
            
            # Generate image
            yield None, "Generating image..."
            
            image = self.pipe(
                prompt=prompt,
                height=height,
                width=width,
                num_inference_steps=num_steps,
                guidance_scale=0.0,  # Must be 0 for Turbo models
                generator=generator,
            ).images[0]
            
            # Create filename (date_time format)
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"{timestamp}.webp"
            filepath = self.outputs_dir / filename
            
            # Save image
            image.save(filepath, "WEBP", quality=95)
            
            # CUDA memory cleanup (except model)
            torch.cuda.empty_cache()
            torch.cuda.synchronize()
            
            status = f"βœ… Image generated and saved successfully!\nFile: {filepath}"
            yield image, status
            
        except Exception as e:
            # Clean memory even on error
            torch.cuda.empty_cache()
            yield None, f"❌ Error occurred: {str(e)}"

# Global generator instance (model stays in CUDA memory)
generator = ZImageGenerator()

def generate_wrapper(prompt, height, width, num_steps, seed, rank):
    """Wrapper function for Gradio"""
    for image, status in generator.generate_image(prompt, height, width, num_steps, seed, rank):
        yield image, status

# Gradio interface
with gr.Blocks(title="Z-Image Turbo Generator") as demo:
    gr.Markdown("""
    # 🎨 Z-Image Turbo Generator
    Generate high-quality images with Z-Image Turbo. Models are kept in CUDA memory until the script is closed.
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            prompt_input = gr.Textbox(
                label="Prompt",
                placeholder="Enter image description...",
                lines=3,
                value="a young military male cooking in the kitchen for therapy"
            )
            
            with gr.Row():
                width_slider = gr.Slider(
                    minimum=512,
                    maximum=2048,
                    step=64,
                    value=1024,
                    label="Width"
                )
                height_slider = gr.Slider(
                    minimum=512,
                    maximum=2048,
                    step=64,
                    value=1024,
                    label="Height"
                )
            
            with gr.Row():
                steps_slider = gr.Slider(
                    minimum=4,
                    maximum=20,
                    step=1,
                    value=8,
                    label="Inference Steps"
                )
                seed_input = gr.Number(
                    label="Seed",
                    value=12345,
                    precision=0
                )
            
            rank_radio = gr.Radio(
                choices=[32, 128, 256],
                value=128,
                label="Rank (32: Fast, 128: Balanced, 256: Best Quality - INT4 only)",
                info="Model will reload if rank is changed"
            )
            
            generate_btn = gr.Button("πŸš€ Generate Image", variant="primary", size="lg")
            
            status_text = gr.Textbox(
                label="Status",
                interactive=False,
                lines=3
            )
        
        with gr.Column(scale=1):
            output_image = gr.Image(
                label="Generated Image",
                type="pil",
                height=600
            )
    
    gr.Markdown("""
    ### πŸ“ Notes:
    - Images are saved to the `outputs/` folder in `.webp` format
    - Filenames use date_time format (e.g., 20250112_143052.webp)
    - Models stay in CUDA memory and are not deleted until script is closed
    - CUDA memory cleanup is performed after each image generation (except model)
    - Guidance scale is automatically set to 0.0 for Turbo models
    - App runs on 127.0.0.1:7860
    
    ### πŸ“‚ Model Loading:
    The app searches for models in the following order:
    1. `./ComfyUI/models/diffusion_models/svdq-{precision}_r{rank}-z-image-turbo.safetensors`
    2. `./models/svdq-{precision}_r{rank}-z-image-turbo.safetensors`
    3. `./svdq-{precision}_r{rank}-z-image-turbo.safetensors` (script directory)
    4. Downloads from HuggingFace if not found locally
    """)
    
    # Event handler
    generate_btn.click(
        fn=generate_wrapper,
        inputs=[prompt_input, height_slider, width_slider, steps_slider, seed_input, rank_radio],
        outputs=[output_image, status_text]
    )

# Launch app
if __name__ == "__main__":
    # Settings for API access
    # Updated parameters for Gradio 6.0+
    demo.queue().launch(
        server_name="127.0.0.1",  # Localhost only
        server_port=7860,
        share=False,
        inbrowser=True,  # Auto-open in browser
        theme=gr.themes.Soft()
    )

install before
pip install torch==2.9.1 diffusers gradio==6.3.0 nunchaku==1.2.0 Pillow

import warnings
# Filter PyTorch indexing warnings
warnings.filterwarnings('ignore', message='Using a non-tuple sequence for multidimensional indexing is deprecated')
warnings.filterwarnings('ignore', category=UserWarning, module='nunchaku.utils')

import torch
import gradio as gr
from diffusers.pipelines.z_image.pipeline_z_image import ZImagePipeline
from nunchaku import NunchakuZImageTransformer2DModel
from nunchaku.utils import get_precision, is_turing
from datetime import datetime
from pathlib import Path
from PIL import Image
import os

class ZImageGenerator:
    def __init__(self):
        self.pipe = None
        self.precision = None
        self.rank = None
        self.dtype = None
        self.outputs_dir = Path("outputs")
        self.outputs_dir.mkdir(exist_ok=True)
        
        # Get script directory
        self.script_dir = Path(__file__).parent.resolve()
        
    def get_local_model_path(self, rank):
        """Get local model path"""
        precision = get_precision()
        model_filename = f"svdq-{precision}_r{rank}-z-image-turbo.safetensors"
        
        # First check ComfyUI directory
        comfyui_path = self.script_dir / "ComfyUI" / "models" / "diffusion_models" / model_filename
        if comfyui_path.exists():
            return str(comfyui_path)
        
        # Check models directory
        models_path = self.script_dir / "models" / model_filename
        if models_path.exists():
            return str(models_path)
        
        # Check script directory
        direct_path = self.script_dir / model_filename
        if direct_path.exists():
            return str(direct_path)
        
        return None
        
    def load_model(self, rank=128):
        """Load model and keep it in CUDA memory"""
        if self.pipe is not None and self.rank == rank:
            return "Model already loaded and ready!"
        
        print(f"Loading model... (Rank: {rank})")
        self.precision = get_precision()
        self.rank = rank
        self.dtype = torch.float16 if is_turing() else torch.bfloat16
        
        # Check for local model first
        local_model_path = self.get_local_model_path(rank)
        
        if local_model_path:
            print(f"Local model found: {local_model_path}")
            # Load transformer from local file
            transformer = NunchakuZImageTransformer2DModel.from_pretrained(
                local_model_path,
                torch_dtype=self.dtype
            )
        else:
            print(f"Local model not found, downloading from HuggingFace...")
            # Load transformer from HuggingFace
            transformer = NunchakuZImageTransformer2DModel.from_pretrained(
                f"nunchaku-tech/nunchaku-z-image-turbo/svdq-{self.precision}_r{self.rank}-z-image-turbo.safetensors",
                torch_dtype=self.dtype
            )
        
        # Create pipeline
        self.pipe = ZImagePipeline.from_pretrained(
            "Tongyi-MAI/Z-Image-Turbo",
            transformer=transformer,
            torch_dtype=self.dtype,
            low_cpu_mem_usage=False
        )
        
        # Move model to GPU (keep in CUDA memory)
        self.pipe = self.pipe.to("cuda")
        
        return f"Model loaded successfully! (Precision: {self.precision}, Rank: {self.rank}, Dtype: {self.dtype})"
    
    def generate_image(self, prompt, height, width, num_steps, seed, rank):
        """Generate image"""
        if self.pipe is None or self.rank != rank:
            status = self.load_model(rank)
            yield None, status
        
        try:
            # Create generator
            generator = torch.Generator(device="cuda").manual_seed(seed)
            
            # Generate image
            yield None, "Generating image..."
            
            image = self.pipe(
                prompt=prompt,
                height=height,
                width=width,
                num_inference_steps=num_steps,
                guidance_scale=0.0,  # Must be 0 for Turbo models
                generator=generator,
            ).images[0]
            
            # Create filename (date_time format)
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"{timestamp}.webp"
            filepath = self.outputs_dir / filename
            
            # Save image
            image.save(filepath, "WEBP", quality=95)
            
            # CUDA memory cleanup (except model)
            torch.cuda.empty_cache()
            torch.cuda.synchronize()
            
            status = f"βœ… Image generated and saved successfully!\nFile: {filepath}"
            yield image, status
            
        except Exception as e:
            # Clean memory even on error
            torch.cuda.empty_cache()
            yield None, f"❌ Error occurred: {str(e)}"

# Global generator instance (model stays in CUDA memory)
generator = ZImageGenerator()

def generate_wrapper(prompt, height, width, num_steps, seed, rank):
    """Wrapper function for Gradio"""
    for image, status in generator.generate_image(prompt, height, width, num_steps, seed, rank):
        yield image, status

# Gradio interface
with gr.Blocks(title="Z-Image Turbo Generator") as demo:
    gr.Markdown("""
    # 🎨 Z-Image Turbo Generator
    Generate high-quality images with Z-Image Turbo. Models are kept in CUDA memory until the script is closed.
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            prompt_input = gr.Textbox(
                label="Prompt",
                placeholder="Enter image description...",
                lines=3,
                value="a young military male cooking in the kitchen for therapy"
            )
            
            with gr.Row():
                width_slider = gr.Slider(
                    minimum=512,
                    maximum=2048,
                    step=64,
                    value=1024,
                    label="Width"
                )
                height_slider = gr.Slider(
                    minimum=512,
                    maximum=2048,
                    step=64,
                    value=1024,
                    label="Height"
                )
            
            with gr.Row():
                steps_slider = gr.Slider(
                    minimum=4,
                    maximum=20,
                    step=1,
                    value=8,
                    label="Inference Steps"
                )
                seed_input = gr.Number(
                    label="Seed",
                    value=12345,
                    precision=0
                )
            
            rank_radio = gr.Radio(
                choices=[32, 128, 256],
                value=128,
                label="Rank (32: Fast, 128: Balanced, 256: Best Quality - INT4 only)",
                info="Model will reload if rank is changed"
            )
            
            generate_btn = gr.Button("πŸš€ Generate Image", variant="primary", size="lg")
            
            status_text = gr.Textbox(
                label="Status",
                interactive=False,
                lines=3
            )
        
        with gr.Column(scale=1):
            output_image = gr.Image(
                label="Generated Image",
                type="pil",
                height=600
            )
    
    gr.Markdown("""
    ### πŸ“ Notes:
    - Images are saved to the `outputs/` folder in `.webp` format
    - Filenames use date_time format (e.g., 20250112_143052.webp)
    - Models stay in CUDA memory and are not deleted until script is closed
    - CUDA memory cleanup is performed after each image generation (except model)
    - Guidance scale is automatically set to 0.0 for Turbo models
    - App runs on 127.0.0.1:7860
    
    ### πŸ“‚ Model Loading:
    The app searches for models in the following order:
    1. `./ComfyUI/models/diffusion_models/svdq-{precision}_r{rank}-z-image-turbo.safetensors`
    2. `./models/svdq-{precision}_r{rank}-z-image-turbo.safetensors`
    3. `./svdq-{precision}_r{rank}-z-image-turbo.safetensors` (script directory)
    4. Downloads from HuggingFace if not found locally
    """)
    
    # Event handler
    generate_btn.click(
        fn=generate_wrapper,
        inputs=[prompt_input, height_slider, width_slider, steps_slider, seed_input, rank_radio],
        outputs=[output_image, status_text]
    )

# Launch app
if __name__ == "__main__":
    # Settings for API access
    # Updated parameters for Gradio 6.0+
    demo.queue().launch(
        server_name="127.0.0.1",  # Localhost only
        server_port=7860,
        share=False,
        inbrowser=True,  # Auto-open in browser
        theme=gr.themes.Soft()
    )

buddy this is for t2i i was checking for i2i

Sign up or log in to comment