Spaces:

techfreakworm
/

z-image-studio

Running on Zero

App Files Files Community

techfreakworm commited on 8 days ago

Commit

613dab3

unverified ·

1 Parent(s): fd0ad15

feat(models): device autodetect, vram-limit helpers, model config registry

Browse files

Files changed (2) hide show

models.py +96 -0
tests/test_models.py +38 -0

models.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""Device autodetect, ZImagePipeline ModelConfig registry, and (Task 4) HF cache mirror."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field
+from typing import Any
+# Avoid importing torch at module load — keeps `import models` fast in CI.
+def on_spaces() -> bool:
+    """True iff we are running inside a Hugging Face ZeroGPU Space."""
+    return bool(os.environ.get("SPACES_ZERO_GPU"))
+def auto_device() -> str:
+    """Detect the best available compute device."""
+    import torch
+    if torch.cuda.is_available():
+        return "cuda"
+    if torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
+def vram_limit_for(device: str, free_gb: float | None = None) -> float:
+    """Conservative VRAM limit (GB) passed to DiffSynth's vram_management.
+    - CUDA: keep ~5% headroom (loaded models + scratch).
+    - MPS: half of unified memory (CPU still needs RAM), capped.
+    - CPU: 0.0 (no offload budget).
+    """
+    if device == "cpu":
+        return 0.0
+    if free_gb is None:
+        import torch
+        if device == "cuda":
+            free_gb = torch.cuda.mem_get_info()[1] / (1024 ** 3)
+        else:  # mps
+            # torch.mps has no mem_get_info on most builds; fall back to a safe constant.
+            free_gb = 24.0
+    if device == "mps":
+        # Use half of unified memory; clamp to 8 GB floor for safety.
+        return max(8.0, free_gb / 2)
+    # cuda
+    return max(8.0, free_gb - 4.0)
+@dataclass(frozen=True)
+class ModelConfig:
+    """Lightweight wrapper around DiffSynth's ModelConfig.
+    Stored as plain data so this module imports cheaply in CI. The real
+    ``diffsynth.core.ModelConfig`` instance is built on demand by
+    :func:`build_diffsynth_configs`.
+    """
+    model_id: str
+    origin_file_pattern: str
+    description: str = ""
+MODEL_CONFIGS: tuple[ModelConfig, ...] = (
+    # Base
+    ModelConfig("Tongyi-MAI/Z-Image", "transformer/*.safetensors",
+                "Z-Image base transformer (25 steps, cfg=4)"),
+    ModelConfig("Tongyi-MAI/Z-Image", "text_encoder/*.safetensors",
+                "Qwen3-4B text encoder — shared between base + turbo"),
+    ModelConfig("Tongyi-MAI/Z-Image", "vae/diffusion_pytorch_model.safetensors",
+                "Flux-family VAE — shared between base + turbo"),
+    # Turbo (transformer only — encoder + VAE come from the Z-Image entry above)
+    ModelConfig("Tongyi-MAI/Z-Image-Turbo", "transformer/*.safetensors",
+                "Z-Image-Turbo transformer (8 steps, cfg=1)"),
+    # ControlNet Union 2.1 (eager preload per spec; can move to lazy if RAM is tight)
+    ModelConfig("PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
+                "Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors",
+                "ControlNet Union 2.1 — canny/depth/pose"),
+)
+TOKENIZER_CONFIG = ModelConfig("Tongyi-MAI/Z-Image", "tokenizer/",
+                                "Qwen3-4B tokenizer")
+def build_diffsynth_configs(
+    configs: tuple[ModelConfig, ...] = MODEL_CONFIGS,
+    vram_cfg: dict[str, Any] | None = None,
+) -> list[Any]:
+    """Build DiffSynth ``ModelConfig`` instances from our lightweight dataclasses.
+    Called at app boot; not at module import. ``vram_cfg`` is the disk-offload
+    block (offload_dtype, offload_device, etc.) that DiffSynth's low-VRAM examples use.
+    """
+    from diffsynth.core import ModelConfig as DSConfig
+    return [
+        DSConfig(model_id=c.model_id, origin_file_pattern=c.origin_file_pattern, **(vram_cfg or {}))
+        for c in configs
+    ]

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+from unittest import mock
+import models
+def test_auto_device_returns_cuda_or_mps_or_cpu():
+    dev = models.auto_device()
+    assert dev in ("cuda", "mps", "cpu")
+def test_on_spaces_reads_env_var():
+    with mock.patch.dict(os.environ, {"SPACES_ZERO_GPU": "1"}, clear=False):
+        assert models.on_spaces() is True
+    with mock.patch.dict(os.environ, {}, clear=True):
+        assert models.on_spaces() is False
+def test_model_configs_contains_both_transformers():
+    configs = models.MODEL_CONFIGS
+    repos = {c.model_id for c in configs}
+    assert "Tongyi-MAI/Z-Image" in repos
+    assert "Tongyi-MAI/Z-Image-Turbo" in repos
+    assert "PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1" in repos
+def test_vram_limit_for_cuda_is_reasonable():
+    limit = models.vram_limit_for("cuda", free_gb=80.0)
+    assert 60.0 <= limit <= 80.0  # leave headroom
+def test_vram_limit_for_mps_is_unified_memory_aware():
+    limit = models.vram_limit_for("mps", free_gb=24.0)
+    assert 12.0 <= limit <= 22.0  # half of unified, headroom
+def test_vram_limit_for_cpu_is_zero():
+    assert models.vram_limit_for("cpu", free_gb=64.0) == 0.0