Spaces:
Running
Running
preload models into page cache, offload-to-cpu, IQ4_XS text enc, conv-direct, mmap, vae f32, miku theme
Browse files- Dockerfile +3 -3
- app.py +22 -4
Dockerfile
CHANGED
|
@@ -43,9 +43,9 @@ RUN mkdir -p /app/models
|
|
| 43 |
RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/z-anime-4step-q5_0.gguf \
|
| 44 |
"https://huggingface.co/WeReCooking/Z-Anime-4step-GGUF/resolve/main/z-anime-distill-4step-q5_0.gguf"
|
| 45 |
|
| 46 |
-
# Qwen3-4B text encoder
|
| 47 |
-
RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/
|
| 48 |
-
"https://huggingface.co/worstplayer/Z-Image_Qwen_3_4b_text_encoder_GGUF/resolve/main/Qwen_3_4b-
|
| 49 |
|
| 50 |
# VAE (~168MB)
|
| 51 |
RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/ae.safetensors \
|
|
|
|
| 43 |
RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/z-anime-4step-q5_0.gguf \
|
| 44 |
"https://huggingface.co/WeReCooking/Z-Anime-4step-GGUF/resolve/main/z-anime-distill-4step-q5_0.gguf"
|
| 45 |
|
| 46 |
+
# Qwen3-4B text encoder IQ4_XS GGUF (~2.29GB) - smaller for 18GB RAM
|
| 47 |
+
RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/qwen3_4b_iq4xs.gguf \
|
| 48 |
+
"https://huggingface.co/worstplayer/Z-Image_Qwen_3_4b_text_encoder_GGUF/resolve/main/Qwen_3_4b-IQ4_XS.gguf"
|
| 49 |
|
| 50 |
# VAE (~168MB)
|
| 51 |
RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/ae.safetensors \
|
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""Z-Anime 6B Image Generation (CPU) via sd-cli binary"""
|
| 2 |
|
| 3 |
-
import os, time, subprocess, tempfile, threading
|
| 4 |
from PIL import Image
|
| 5 |
import gradio as gr
|
| 6 |
|
|
@@ -8,9 +8,22 @@ import gradio as gr
|
|
| 8 |
# Model paths (downloaded at build time)
|
| 9 |
# ---------------------------------------------------------------------------
|
| 10 |
DIFFUSION = "/app/models/z-anime-4step-q5_0.gguf"
|
| 11 |
-
LLM = "/app/models/
|
| 12 |
VAE = "/app/models/ae.safetensors"
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
RESOLUTIONS = ["512x512", "768x512", "512x768"]
|
| 15 |
STEPS = 4
|
| 16 |
CFG = 1.0
|
|
@@ -49,8 +62,13 @@ def generate(prompt, negative_prompt, resolution, seed):
|
|
| 49 |
"--cfg-scale", str(CFG),
|
| 50 |
"--sampling-method", "euler_a",
|
| 51 |
"-o", output_path,
|
|
|
|
| 52 |
"--diffusion-fa",
|
|
|
|
| 53 |
"--vae-tiling",
|
|
|
|
|
|
|
|
|
|
| 54 |
"-v",
|
| 55 |
]
|
| 56 |
if seed >= 0:
|
|
@@ -108,9 +126,9 @@ def generate(prompt, negative_prompt, resolution, seed):
|
|
| 108 |
# ---------------------------------------------------------------------------
|
| 109 |
with gr.Blocks(title="Z-Anime (CPU)") as demo:
|
| 110 |
gr.Markdown(
|
| 111 |
-
"**[Z-Anime 6B](https://huggingface.co/SeeSee21/Z-Anime)** S3-DiT
|
| 112 |
"(distill 4-step) via [sd.cpp](https://github.com/leejet/stable-diffusion.cpp) | "
|
| 113 |
-
"
|
| 114 |
)
|
| 115 |
with gr.Row():
|
| 116 |
with gr.Column():
|
|
|
|
| 1 |
"""Z-Anime 6B Image Generation (CPU) via sd-cli binary"""
|
| 2 |
|
| 3 |
+
import os, time, subprocess, tempfile, threading, mmap
|
| 4 |
from PIL import Image
|
| 5 |
import gradio as gr
|
| 6 |
|
|
|
|
| 8 |
# Model paths (downloaded at build time)
|
| 9 |
# ---------------------------------------------------------------------------
|
| 10 |
DIFFUSION = "/app/models/z-anime-4step-q5_0.gguf"
|
| 11 |
+
LLM = "/app/models/qwen3_4b_iq4xs.gguf"
|
| 12 |
VAE = "/app/models/ae.safetensors"
|
| 13 |
|
| 14 |
+
# Warm up page cache — read all model files so --mmap loads from RAM
|
| 15 |
+
print("[init] Preloading models into page cache...")
|
| 16 |
+
t0 = time.time()
|
| 17 |
+
for model_path in [DIFFUSION, LLM, VAE]:
|
| 18 |
+
if os.path.exists(model_path):
|
| 19 |
+
sz = os.path.getsize(model_path)
|
| 20 |
+
with open(model_path, "rb") as f:
|
| 21 |
+
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
| 22 |
+
mm.read()
|
| 23 |
+
mm.close()
|
| 24 |
+
print(f" {os.path.basename(model_path)}: {sz / 1e9:.2f} GB cached")
|
| 25 |
+
print(f"[init] Page cache warm in {time.time() - t0:.1f}s")
|
| 26 |
+
|
| 27 |
RESOLUTIONS = ["512x512", "768x512", "512x768"]
|
| 28 |
STEPS = 4
|
| 29 |
CFG = 1.0
|
|
|
|
| 62 |
"--cfg-scale", str(CFG),
|
| 63 |
"--sampling-method", "euler_a",
|
| 64 |
"-o", output_path,
|
| 65 |
+
"--offload-to-cpu",
|
| 66 |
"--diffusion-fa",
|
| 67 |
+
"--diffusion-conv-direct",
|
| 68 |
"--vae-tiling",
|
| 69 |
+
"--vae-conv-direct",
|
| 70 |
+
"--tensor-type-rules", "^vae=f32",
|
| 71 |
+
"--mmap",
|
| 72 |
"-v",
|
| 73 |
]
|
| 74 |
if seed >= 0:
|
|
|
|
| 126 |
# ---------------------------------------------------------------------------
|
| 127 |
with gr.Blocks(title="Z-Anime (CPU)") as demo:
|
| 128 |
gr.Markdown(
|
| 129 |
+
"**[Z-Anime 6B](https://huggingface.co/SeeSee21/Z-Anime)** S3-DiT Q5_0 GGUF "
|
| 130 |
"(distill 4-step) via [sd.cpp](https://github.com/leejet/stable-diffusion.cpp) | "
|
| 131 |
+
"~25 min at 512x512 on free CPU"
|
| 132 |
)
|
| 133 |
with gr.Row():
|
| 134 |
with gr.Column():
|