File size: 4,196 Bytes
5b07271
 
 
 
 
 
 
fb6f526
5b07271
 
 
 
a8b3ea4
 
 
5b07271
 
 
a8b3ea4
 
923e954
a8b3ea4
 
 
 
 
 
 
5b07271
923e954
a8b3ea4
5b07271
 
 
 
 
 
a8b3ea4
 
923e954
 
a8b3ea4
 
 
 
 
 
 
 
 
923e954
a8b3ea4
 
 
 
 
 
 
 
5b07271
 
 
a8b3ea4
5b07271
01f371b
5b07271
 
 
 
 
 
 
 
 
 
 
 
923e954
a8b3ea4
 
 
 
 
923e954
 
a8b3ea4
 
 
5b07271
 
 
 
a8b3ea4
 
5b07271
 
a8b3ea4
 
 
 
 
 
 
 
 
 
 
 
 
 
5b07271
 
 
 
a8b3ea4
fb6f526
5b07271
 
a8b3ea4
 
 
 
5b07271
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
{
  "model_type": "z-anime",
  "architecture": "S3-DiT",
  "parameters": "6B",
  "license": "apache-2.0",
  "base_model": "Tongyi-MAI/Z-Image",
  "base_model_relation": "finetune",
  "author": "mcuo",
  "pipeline_tag": "text-to-image",
  "prompting": {
    "style": "natural-language",
    "negative_prompt_support": {
      "base": "full",
      "distill_8step": "limited",
      "distill_4step": "limited"
    }
  },
  "variants": {
    "base": {
      "bf16": "diffusion_models/z-anime-base-bf16.safetensors",
      "fp8": "diffusion_models/z-anime-base-fp8.safetensors",
      "recommended_settings": {
        "steps": "28-50",
        "cfg": "3.0-5.0",
        "sampler": "euler_ancestral",
        "scheduler": "beta"
      }
    },
    "distill_8step": {
      "bf16": "diffusion_models/z-anime-distill-8step-bf16.safetensors",
      "fp8": "diffusion_models/z-anime-distill-8step-fp8.safetensors",
      "recommended_settings": {
        "steps": 8,
        "cfg": 1.0,
        "sampler": "euler_ancestral",
        "scheduler": "beta"
      }
    },
    "distill_4step": {
      "bf16": "diffusion_models/z-anime-distill-4step-bf16.safetensors",
      "fp8": "diffusion_models/z-anime-distill-4step-fp8.safetensors",
      "recommended_settings": {
        "steps": 4,
        "cfg": 1.0,
        "sampler": "euler_ancestral",
        "scheduler": "beta"
      }
    },
    "gguf": {
      "q8_0": {
        "file": "gguf/z-anime-base-q8_0.gguf",
        "description": "Q8_0 quantization",
        "size": "~6.73 GB"
      },
      "q4_k_s": {
        "file": "gguf/z-anime-base-q4_k_s.gguf",
        "description": "Q4_K_S quantization",
        "size": "~4.2 GB"
      }
    }
  },
  "diffusers_folder": {
    "path": "diffusers/",
    "pipeline_class": "ZImagePipeline",
    "usage": "ZImagePipeline.from_pretrained('mcuo/Anime-Z', subfolder='diffusers', torch_dtype=torch.bfloat16)",
    "components": [
      "model_index.json",
      "scheduler/",
      "tokenizer/",
      "text_encoder/",
      "transformer/",
      "vae/"
    ]
  },
  "components": {
    "text_encoders": {
      "default": {
        "bf16": "text_encoder/qwen_3_4b-bf16.safetensors",
        "fp8": "text_encoder/qwen_3_4b-fp8.safetensors",
        "description": "Standard Z-Image text encoder, repackaged as a single safetensors",
        "comfyui_path": "ComfyUI/models/clip/"
      },
      "engineer_v4": {
        "bf16": "text_encoder/qwen_3_4b-engineer-v4-bf16.safetensors",
        "fp8": "text_encoder/qwen_3_4b-engineer-v4-fp8.safetensors",
        "description": "Alternative full fine-tune by BennyDaBall (SMART training, more varied outputs)",
        "source": "https://huggingface.co/BennyDaBall/Qwen3-4b-Z-Image-Engineer-V4",
        "comfyui_path": "ComfyUI/models/clip/"
      }
    },
    "vae": {
      "file": "vae/ae.safetensors",
      "description": "Z-Image VAE (slightly trained alongside Z-Anime)",
      "comfyui_path": "ComfyUI/models/vae/"
    }
  },
  "comfyui_paths": {
    "diffusion_models": "ComfyUI/models/diffusion_models/",
    "unet": "ComfyUI/models/unet/",
    "clip": "ComfyUI/models/clip/",
    "vae": "ComfyUI/models/vae/",
    "checkpoints": "ComfyUI/models/checkpoints/"
  },
  "requirements": {
    "custom_nodes": [
      "rgthree-comfy",
      "ComfyUI-Lora-Manager",
      "ComfyUI-SeedVR2_VideoUpscaler (optional)"
    ]
  },
  "supported_vram": "8GB+",
  "links": {
    "civitai": "https://civitai.red/models/2483351",
    "base_model": "https://huggingface.co/Tongyi-MAI/Z-Image",
    "engineer_v4": "https://huggingface.co/BennyDaBall/Qwen3-4b-Z-Image-Engineer-V4",
    "author": "https://huggingface.co/mcuo"
  },
  "notes": [
    "BF16 and FP8 are the main release formats.",
    "GGUF variants are intended for lower-memory or alternative inference setups.",
    "Two text encoders are included: the standard Z-Image one (default) and BennyDaBall's Engineer V4 (alternative).",
    "The diffusers/ subfolder is a full diffusers-format checkpoint loadable via ZImagePipeline.from_pretrained(repo, subfolder='diffusers')."
  ]
}