Spaces:

WeReCooking
/

Z-Anime-CPU

Running

App Files Files Community

Nekochu commited on 7 days ago

Commit

00c018e

1 Parent(s): 0137123

preload models into page cache, offload-to-cpu, IQ4_XS text enc, conv-direct, mmap, vae f32, miku theme

Browse files

Files changed (2) hide show

Dockerfile +3 -3
app.py +22 -4

Dockerfile CHANGED Viewed

@@ -43,9 +43,9 @@ RUN mkdir -p /app/models
 RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/z-anime-4step-q5_0.gguf \
     "https://huggingface.co/WeReCooking/Z-Anime-4step-GGUF/resolve/main/z-anime-distill-4step-q5_0.gguf"
-# Qwen3-4B text encoder Q8_0 GGUF (~4.28GB)
-RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/qwen3_4b_q8_0.gguf \
-    "https://huggingface.co/worstplayer/Z-Image_Qwen_3_4b_text_encoder_GGUF/resolve/main/Qwen_3_4b-Q8_0.gguf"
 # VAE (~168MB)
 RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/ae.safetensors \

 RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/z-anime-4step-q5_0.gguf \
     "https://huggingface.co/WeReCooking/Z-Anime-4step-GGUF/resolve/main/z-anime-distill-4step-q5_0.gguf"
+# Qwen3-4B text encoder IQ4_XS GGUF (~2.29GB) - smaller for 18GB RAM
+RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/qwen3_4b_iq4xs.gguf \
+    "https://huggingface.co/worstplayer/Z-Image_Qwen_3_4b_text_encoder_GGUF/resolve/main/Qwen_3_4b-IQ4_XS.gguf"
 # VAE (~168MB)
 RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/ae.safetensors \

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Z-Anime 6B Image Generation (CPU) via sd-cli binary"""
-import os, time, subprocess, tempfile, threading
 from PIL import Image
 import gradio as gr
@@ -8,9 +8,22 @@ import gradio as gr
 # Model paths (downloaded at build time)
 # ---------------------------------------------------------------------------
 DIFFUSION = "/app/models/z-anime-4step-q5_0.gguf"
-LLM = "/app/models/qwen3_4b_q8_0.gguf"
 VAE = "/app/models/ae.safetensors"
 RESOLUTIONS = ["512x512", "768x512", "512x768"]
 STEPS = 4
 CFG = 1.0
@@ -49,8 +62,13 @@ def generate(prompt, negative_prompt, resolution, seed):
         "--cfg-scale", str(CFG),
         "--sampling-method", "euler_a",
         "-o", output_path,
         "--diffusion-fa",
         "--vae-tiling",
         "-v",
     ]
     if seed >= 0:
@@ -108,9 +126,9 @@ def generate(prompt, negative_prompt, resolution, seed):
 # ---------------------------------------------------------------------------
 with gr.Blocks(title="Z-Anime (CPU)") as demo:
     gr.Markdown(
-        "**[Z-Anime 6B](https://huggingface.co/SeeSee21/Z-Anime)** S3-DiT Q5_K_M GGUF "
         "(distill 4-step) via [sd.cpp](https://github.com/leejet/stable-diffusion.cpp) | "
-        "Free CPU inference"
     )
     with gr.Row():
         with gr.Column():

 """Z-Anime 6B Image Generation (CPU) via sd-cli binary"""
+import os, time, subprocess, tempfile, threading, mmap
 from PIL import Image
 import gradio as gr
 # Model paths (downloaded at build time)
 # ---------------------------------------------------------------------------
 DIFFUSION = "/app/models/z-anime-4step-q5_0.gguf"
+LLM = "/app/models/qwen3_4b_iq4xs.gguf"
 VAE = "/app/models/ae.safetensors"
+# Warm up page cache — read all model files so --mmap loads from RAM
+print("[init] Preloading models into page cache...")
+t0 = time.time()
+for model_path in [DIFFUSION, LLM, VAE]:
+    if os.path.exists(model_path):
+        sz = os.path.getsize(model_path)
+        with open(model_path, "rb") as f:
+            mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+            mm.read()
+            mm.close()
+        print(f"  {os.path.basename(model_path)}: {sz / 1e9:.2f} GB cached")
+print(f"[init] Page cache warm in {time.time() - t0:.1f}s")
 RESOLUTIONS = ["512x512", "768x512", "512x768"]
 STEPS = 4
 CFG = 1.0
         "--cfg-scale", str(CFG),
         "--sampling-method", "euler_a",
         "-o", output_path,
+        "--offload-to-cpu",
         "--diffusion-fa",
+        "--diffusion-conv-direct",
         "--vae-tiling",
+        "--vae-conv-direct",
+        "--tensor-type-rules", "^vae=f32",
+        "--mmap",
         "-v",
     ]
     if seed >= 0:
 # ---------------------------------------------------------------------------
 with gr.Blocks(title="Z-Anime (CPU)") as demo:
     gr.Markdown(
+        "**[Z-Anime 6B](https://huggingface.co/SeeSee21/Z-Anime)** S3-DiT Q5_0 GGUF "
         "(distill 4-step) via [sd.cpp](https://github.com/leejet/stable-diffusion.cpp) | "
+        "~25 min at 512x512 on free CPU"
     )
     with gr.Row():
         with gr.Column():