Nekochu commited on
Commit
00c018e
·
1 Parent(s): 0137123

preload models into page cache, offload-to-cpu, IQ4_XS text enc, conv-direct, mmap, vae f32, miku theme

Browse files
Files changed (2) hide show
  1. Dockerfile +3 -3
  2. app.py +22 -4
Dockerfile CHANGED
@@ -43,9 +43,9 @@ RUN mkdir -p /app/models
43
  RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/z-anime-4step-q5_0.gguf \
44
  "https://huggingface.co/WeReCooking/Z-Anime-4step-GGUF/resolve/main/z-anime-distill-4step-q5_0.gguf"
45
 
46
- # Qwen3-4B text encoder Q8_0 GGUF (~4.28GB)
47
- RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/qwen3_4b_q8_0.gguf \
48
- "https://huggingface.co/worstplayer/Z-Image_Qwen_3_4b_text_encoder_GGUF/resolve/main/Qwen_3_4b-Q8_0.gguf"
49
 
50
  # VAE (~168MB)
51
  RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/ae.safetensors \
 
43
  RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/z-anime-4step-q5_0.gguf \
44
  "https://huggingface.co/WeReCooking/Z-Anime-4step-GGUF/resolve/main/z-anime-distill-4step-q5_0.gguf"
45
 
46
+ # Qwen3-4B text encoder IQ4_XS GGUF (~2.29GB) - smaller for 18GB RAM
47
+ RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/qwen3_4b_iq4xs.gguf \
48
+ "https://huggingface.co/worstplayer/Z-Image_Qwen_3_4b_text_encoder_GGUF/resolve/main/Qwen_3_4b-IQ4_XS.gguf"
49
 
50
  # VAE (~168MB)
51
  RUN curl -fL --retry 3 --retry-delay 5 -o /app/models/ae.safetensors \
app.py CHANGED
@@ -1,6 +1,6 @@
1
  """Z-Anime 6B Image Generation (CPU) via sd-cli binary"""
2
 
3
- import os, time, subprocess, tempfile, threading
4
  from PIL import Image
5
  import gradio as gr
6
 
@@ -8,9 +8,22 @@ import gradio as gr
8
  # Model paths (downloaded at build time)
9
  # ---------------------------------------------------------------------------
10
  DIFFUSION = "/app/models/z-anime-4step-q5_0.gguf"
11
- LLM = "/app/models/qwen3_4b_q8_0.gguf"
12
  VAE = "/app/models/ae.safetensors"
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  RESOLUTIONS = ["512x512", "768x512", "512x768"]
15
  STEPS = 4
16
  CFG = 1.0
@@ -49,8 +62,13 @@ def generate(prompt, negative_prompt, resolution, seed):
49
  "--cfg-scale", str(CFG),
50
  "--sampling-method", "euler_a",
51
  "-o", output_path,
 
52
  "--diffusion-fa",
 
53
  "--vae-tiling",
 
 
 
54
  "-v",
55
  ]
56
  if seed >= 0:
@@ -108,9 +126,9 @@ def generate(prompt, negative_prompt, resolution, seed):
108
  # ---------------------------------------------------------------------------
109
  with gr.Blocks(title="Z-Anime (CPU)") as demo:
110
  gr.Markdown(
111
- "**[Z-Anime 6B](https://huggingface.co/SeeSee21/Z-Anime)** S3-DiT Q5_K_M GGUF "
112
  "(distill 4-step) via [sd.cpp](https://github.com/leejet/stable-diffusion.cpp) | "
113
- "Free CPU inference"
114
  )
115
  with gr.Row():
116
  with gr.Column():
 
1
  """Z-Anime 6B Image Generation (CPU) via sd-cli binary"""
2
 
3
+ import os, time, subprocess, tempfile, threading, mmap
4
  from PIL import Image
5
  import gradio as gr
6
 
 
8
  # Model paths (downloaded at build time)
9
  # ---------------------------------------------------------------------------
10
  DIFFUSION = "/app/models/z-anime-4step-q5_0.gguf"
11
+ LLM = "/app/models/qwen3_4b_iq4xs.gguf"
12
  VAE = "/app/models/ae.safetensors"
13
 
14
+ # Warm up page cache — read all model files so --mmap loads from RAM
15
+ print("[init] Preloading models into page cache...")
16
+ t0 = time.time()
17
+ for model_path in [DIFFUSION, LLM, VAE]:
18
+ if os.path.exists(model_path):
19
+ sz = os.path.getsize(model_path)
20
+ with open(model_path, "rb") as f:
21
+ mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
22
+ mm.read()
23
+ mm.close()
24
+ print(f" {os.path.basename(model_path)}: {sz / 1e9:.2f} GB cached")
25
+ print(f"[init] Page cache warm in {time.time() - t0:.1f}s")
26
+
27
  RESOLUTIONS = ["512x512", "768x512", "512x768"]
28
  STEPS = 4
29
  CFG = 1.0
 
62
  "--cfg-scale", str(CFG),
63
  "--sampling-method", "euler_a",
64
  "-o", output_path,
65
+ "--offload-to-cpu",
66
  "--diffusion-fa",
67
+ "--diffusion-conv-direct",
68
  "--vae-tiling",
69
+ "--vae-conv-direct",
70
+ "--tensor-type-rules", "^vae=f32",
71
+ "--mmap",
72
  "-v",
73
  ]
74
  if seed >= 0:
 
126
  # ---------------------------------------------------------------------------
127
  with gr.Blocks(title="Z-Anime (CPU)") as demo:
128
  gr.Markdown(
129
+ "**[Z-Anime 6B](https://huggingface.co/SeeSee21/Z-Anime)** S3-DiT Q5_0 GGUF "
130
  "(distill 4-step) via [sd.cpp](https://github.com/leejet/stable-diffusion.cpp) | "
131
+ "~25 min at 512x512 on free CPU"
132
  )
133
  with gr.Row():
134
  with gr.Column():