Spaces:

techfreakworm
/

z-image-studio

Running on Zero

File size: 8,290 Bytes

from unittest.mock import MagicMock

import pytest
from PIL import Image

import modes


@pytest.fixture
def fake_pipe():
    """Stand-in pipeline that records its __call__ args and returns a dummy image."""
    pipe = MagicMock()
    pipe.dit = MagicMock()
    pipe.model_pool = MagicMock()
    pipe.return_value = Image.new("RGB", (64, 64), color=(255, 176, 46))
    return pipe


def test_t2i_turbo_builds_minimal_call(fake_pipe):
    out, meta = modes.call_t2i(
        fake_pipe,
        params=dict(
            prompt="a cat",
            negative_prompt="",
            model="Turbo",
            steps=8,
            cfg=1.0,
            width=1024,
            height=1024,
            seed=42,
            lora_path=None,
            lora_strength=0.0,
        ),
    )
    fake_pipe.assert_called_once()
    kwargs = fake_pipe.call_args.kwargs
    assert kwargs["prompt"] == "a cat"
    assert kwargs["cfg_scale"] == 1.0
    assert kwargs["num_inference_steps"] == 8
    assert kwargs["width"] == 1024
    assert kwargs["seed"] == 42
    assert kwargs["sigma_shift"] == 3.0
    assert "negative_prompt" not in kwargs or not kwargs.get("negative_prompt")
    assert meta["model"] == "Turbo"
    assert meta["steps"] == 8
    assert isinstance(out, Image.Image)


def test_t2i_base_passes_negative_prompt_and_cfg4(fake_pipe):
    modes.call_t2i(
        fake_pipe,
        params=dict(
            prompt="a cat",
            negative_prompt="blurry, lowres",
            model="Base",
            steps=25,
            cfg=4.0,
            width=1024,
            height=1024,
            seed=42,
            lora_path=None,
            lora_strength=0.0,
        ),
    )
    kwargs = fake_pipe.call_args.kwargs
    assert kwargs["negative_prompt"] == "blurry, lowres"
    assert kwargs["cfg_scale"] == 4.0
    assert kwargs["num_inference_steps"] == 25


def test_t2i_swaps_transformer_via_pool_index(fake_pipe):
    """Base picks pool.model[0]; Turbo picks pool.model[1] (load-order indexed)."""
    base_dit = object()
    turbo_dit = object()
    # Two z_image_dit entries in load order: Base first, Turbo second.
    fake_pipe._zis_pool.model = [base_dit, turbo_dit, "vae_decoder_obj"]
    fake_pipe._zis_pool.model_name = ["z_image_dit", "z_image_dit", "flux_vae_decoder"]

    modes.call_t2i(
        fake_pipe,
        params=dict(
            prompt="x",
            negative_prompt="",
            model="Base",
            steps=25,
            cfg=4.0,
            width=1024,
            height=1024,
            seed=0,
            lora_path=None,
            lora_strength=0.0,
        ),
    )
    assert fake_pipe.dit is base_dit

    modes.call_t2i(
        fake_pipe,
        params=dict(
            prompt="x",
            negative_prompt="",
            model="Turbo",
            steps=8,
            cfg=1.0,
            width=1024,
            height=1024,
            seed=0,
            lora_path=None,
            lora_strength=0.0,
        ),
    )
    assert fake_pipe.dit is turbo_dit


def test_controlnet_calls_preprocessor_then_pipeline(fake_pipe, monkeypatch):
    canny_called = []

    def fake_run(mode, img):
        canny_called.append((mode, img.size))
        return img  # passthrough for test

    monkeypatch.setattr(modes, "preprocessors", type("P", (), {"run": staticmethod(fake_run)}))

    input_image = Image.new("RGB", (1024, 1024))
    _out, meta = modes.call_controlnet(
        fake_pipe,
        params=dict(
            prompt="cinematic portrait",
            input_image=input_image,
            preprocessor="Canny",
            controlnet_scale=1.0,
            steps=9,
            seed=42,
            lora_path=None,
            lora_strength=0.0,
        ),
    )

    assert canny_called == [("Canny", (1024, 1024))]
    kwargs = fake_pipe.call_args.kwargs
    assert "controlnet_inputs" in kwargs
    cn_in = kwargs["controlnet_inputs"]
    assert len(cn_in) == 1
    assert cn_in[0].scale == 1.0
    assert kwargs["num_inference_steps"] == 9
    assert kwargs["cfg_scale"] == 1.0
    assert meta["preprocessor"] == "Canny"


def test_controlnet_rejects_missing_input_image(fake_pipe):
    with pytest.raises(ValueError):
        modes.call_controlnet(
            fake_pipe,
            params=dict(
                prompt="x",
                input_image=None,
                preprocessor="Canny",
                controlnet_scale=1.0,
                steps=9,
                seed=0,
                lora_path=None,
                lora_strength=0.0,
            ),
        )


def test_upscale_runs_realesrgan_then_pipeline(fake_pipe, monkeypatch):
    calls = {"upscale": None}

    def fake_2x(img, model_path):
        calls["upscale"] = (img.size, str(model_path))
        w, h = img.size
        return img.resize((w * 2, h * 2), Image.LANCZOS)

    monkeypatch.setattr(modes, "upscale", type("U", (), {"realesrgan_2x": staticmethod(fake_2x)}))

    input_image = Image.new("RGB", (512, 512))
    _out, meta = modes.call_upscale(
        fake_pipe,
        params=dict(
            prompt="masterpiece, 8k",
            input_image=input_image,
            refine_steps=5,
            refine_denoise=0.33,
            seed=42,
            esrgan_model_path="/fake/path/RealESRGAN_x4plus.pth",
        ),
    )

    assert calls["upscale"] == ((512, 512), "/fake/path/RealESRGAN_x4plus.pth")
    kwargs = fake_pipe.call_args.kwargs
    assert kwargs["input_image"].size == (1024, 1024)  # 2x via fake_2x
    assert kwargs["denoising_strength"] == 0.33
    assert kwargs["num_inference_steps"] == 5
    assert kwargs["cfg_scale"] == 1.0
    # height/width must match the post-upscale image, else add_noise blows up on
    # a shape mismatch between input_latents and noise.
    assert kwargs["width"] == 1024
    assert kwargs["height"] == 1024
    assert meta["mode"] == "upscale"


def test_upscale_crops_to_multiple_of_16(fake_pipe, monkeypatch):
    """Regression: an upscaled image with non-aligned dims used to crash the pipeline
    in add_noise because DiffSynth rounds height/width *up* to mod 16 for the noise
    tensor while its VAE rounds *down* for the encoded latents. We crop to mod 16
    before passing in, so both shapes agree."""

    def fake_2x(img, model_path):
        return Image.new("RGB", (1240, 728))  # 1240, 728 are NOT multiples of 16

    monkeypatch.setattr(modes, "upscale", type("U", (), {"realesrgan_2x": staticmethod(fake_2x)}))

    _out, meta = modes.call_upscale(
        fake_pipe,
        params=dict(
            prompt="masterpiece, 8k",
            input_image=Image.new("RGB", (620, 364)),
            refine_steps=5,
            refine_denoise=0.33,
            seed=0,
            esrgan_model_path="/fake/path/RealESRGAN_x4plus.pth",
        ),
    )

    kwargs = fake_pipe.call_args.kwargs
    assert kwargs["width"] == 1232  # 1240 // 16 * 16
    assert kwargs["height"] == 720  # 728 // 16 * 16
    assert kwargs["input_image"].size == (1232, 720)
    assert meta["width"] == 1232
    assert meta["height"] == 720


def test_upscale_rejects_missing_image(fake_pipe):
    with pytest.raises(ValueError):
        modes.call_upscale(
            fake_pipe,
            params=dict(
                prompt="x",
                input_image=None,
                refine_steps=5,
                refine_denoise=0.33,
                seed=0,
                esrgan_model_path="/fake.pth",
            ),
        )


def test_controlnet_falls_back_when_preprocessor_raises(fake_pipe, monkeypatch):
    def boom(mode, img):
        raise RuntimeError("preprocessor exploded")

    monkeypatch.setattr(modes, "preprocessors", type("P", (), {"run": staticmethod(boom)}))

    input_image = Image.new("RGB", (512, 512))
    _out, _meta = modes.call_controlnet(
        fake_pipe,
        params=dict(
            prompt="x",
            input_image=input_image,
            preprocessor="Canny",
            controlnet_scale=1.0,
            steps=9,
            seed=0,
            lora_path=None,
            lora_strength=0.0,
        ),
    )
    # Pipeline still ran — fallback to raw input
    kwargs = fake_pipe.call_args.kwargs
    cn_in = kwargs["controlnet_inputs"]
    assert cn_in[0].image is input_image  # the raw input, not a preprocessed image