Text-to-Image
Diffusers
Safetensors
English
Russian
LensPipeline
LensPipeline
sdnq
quantized
uint4
static-quantization
ablation
Instructions to use WaveCut/Lens-Turbo-SDNQ-uint4-static with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use WaveCut/Lens-Turbo-SDNQ-uint4-static with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("WaveCut/Lens-Turbo-SDNQ-uint4-static", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Draw Things
- DiffusionBee
Upload corrected SDNQ uint4 static Lens-Turbo
Browse files- .gitattributes +11 -0
- README.md +52 -72
- assets/comparison/comparison_grid.jpg +3 -0
- assets/comparison/p01_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p02_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p03_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p04_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p05_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p06_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p07_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p08_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p09_sdnq_uint4_static_fixed.png +3 -0
- assets/comparison/p10_sdnq_uint4_static_fixed.png +3 -0
- benchmark_metrics.json +7 -7
- comparison_matrix.json +80 -80
- sdnq_quantization_summary.json +17 -13
- transformer/config.json +21 -19
- transformer/diffusion_pytorch_model-00001-of-00002.safetensors +3 -0
- transformer/diffusion_pytorch_model-00002-of-00002.safetensors +3 -0
- transformer/diffusion_pytorch_model.safetensors.index.json +0 -0
- transformer/quantization_config.json +21 -19
.gitattributes
CHANGED
|
@@ -54,3 +54,14 @@ assets/comparison/p09_sdnq_uint_static.png filter=lfs diff=lfs merge=lfs -text
|
|
| 54 |
assets/comparison/p10_original.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
assets/comparison/p10_sdnq_uint_static.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
assets/comparison/p10_original.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
assets/comparison/p10_sdnq_uint_static.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
assets/comparison/comparison_grid.jpg filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
assets/comparison/p01_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
assets/comparison/p02_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
assets/comparison/p03_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
assets/comparison/p04_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
assets/comparison/p05_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
assets/comparison/p06_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
assets/comparison/p07_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
assets/comparison/p08_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
assets/comparison/p09_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
assets/comparison/p10_sdnq_uint4_static_fixed.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -13,50 +13,59 @@ tags:
|
|
| 13 |
- quantized
|
| 14 |
- uint4
|
| 15 |
- static-quantization
|
|
|
|
| 16 |
base_model: microsoft/Lens-Turbo
|
| 17 |
---
|
| 18 |
|
| 19 |
-
# Lens-Turbo SDNQ
|
| 20 |
|
| 21 |
-
This is
|
| 22 |
-
It keeps the Lens pipeline structure intact and focuses quantization on the denoising transformer, which is the main generation component.
|
| 23 |
|
| 24 |
-
The
|
| 25 |
|
| 26 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
| Field | Value |
|
| 29 |
| --- | --- |
|
| 30 |
-
| Method | SDNQ
|
| 31 |
| Quantized component | `transformer` / `LensTransformer2DModel` |
|
|
|
|
|
|
|
| 32 |
| Weight dtype | `uint4` |
|
| 33 |
| Quantized matmul | enabled |
|
| 34 |
| Quantized matmul dtype | `int8` |
|
| 35 |
-
| Static quantization | enabled |
|
| 36 |
| Dynamic quantization | disabled |
|
| 37 |
| SVDQuant | disabled |
|
| 38 |
| Hadamard rotation | disabled |
|
| 39 |
-
| Convolution quantization | disabled |
|
| 40 |
-
| Embedding quantization | disabled |
|
| 41 |
| Text encoder | unchanged from source checkpoint |
|
| 42 |
| VAE | unchanged from source checkpoint |
|
| 43 |
| Compute dtype | `torch.bfloat16` |
|
| 44 |
-
|
| 45 |
-
Raw config:
|
| 46 |
|
| 47 |
```json
|
| 48 |
{
|
| 49 |
"weights_dtype": "uint4",
|
| 50 |
"quantized_matmul_dtype": "int8",
|
| 51 |
-
"group_size":
|
| 52 |
-
"use_static_quantization":
|
| 53 |
-
"use_dynamic_quantization":
|
| 54 |
-
"use_quantized_matmul":
|
| 55 |
-
"use_svd":
|
| 56 |
-
"use_hadamard":
|
| 57 |
-
"quant_conv":
|
| 58 |
-
"quant_embedding":
|
| 59 |
-
"dequantize_fp32":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
"quantization_device": "cuda",
|
| 61 |
"return_device": "cuda"
|
| 62 |
}
|
|
@@ -64,17 +73,13 @@ Raw config:
|
|
| 64 |
|
| 65 |
## Usage
|
| 66 |
|
| 67 |
-
Install the Lens inference code and SDNQ, then download the repo snapshot and load the quantized transformer explicitly:
|
| 68 |
-
|
| 69 |
```python
|
| 70 |
import torch
|
| 71 |
from huggingface_hub import snapshot_download
|
| 72 |
from lens import LensPipeline, LensTransformer2DModel
|
| 73 |
from sdnq import load_sdnq_model
|
| 74 |
|
| 75 |
-
|
| 76 |
-
model_dir = snapshot_download(repo_id)
|
| 77 |
-
|
| 78 |
transformer = load_sdnq_model(
|
| 79 |
model_dir + "/transformer",
|
| 80 |
model_cls=LensTransformer2DModel,
|
|
@@ -83,51 +88,38 @@ transformer = load_sdnq_model(
|
|
| 83 |
dequantize_fp32=False,
|
| 84 |
use_quantized_matmul=True,
|
| 85 |
)
|
| 86 |
-
|
| 87 |
pipe = LensPipeline.from_pretrained(
|
| 88 |
model_dir,
|
| 89 |
transformer=transformer,
|
| 90 |
torch_dtype=torch.bfloat16,
|
| 91 |
).to("cuda")
|
| 92 |
-
|
| 93 |
-
image = pipe(
|
| 94 |
-
"A cat holding a sign that says hello world",
|
| 95 |
-
base_resolution=1024,
|
| 96 |
-
aspect_ratio="1:1",
|
| 97 |
-
num_inference_steps=4,
|
| 98 |
-
guidance_scale=1.0,
|
| 99 |
-
generator=torch.Generator("cuda").manual_seed(0),
|
| 100 |
-
).images[0]
|
| 101 |
```
|
| 102 |
|
| 103 |
## Benchmark
|
| 104 |
|
| 105 |
-
|
| 106 |
-
Each prompt used `base_resolution=1024`, `aspect_ratio="1:1"`, `num_inference_steps=4`, `guidance_scale=1.0`, `torch.bfloat16`, and a fixed CUDA seed.
|
| 107 |
|
| 108 |
-
| Metric | Original Lens-Turbo | SDNQ
|
| 109 |
| --- | ---: | ---: |
|
| 110 |
-
| Load time, seconds |
|
| 111 |
-
| Load peak allocated VRAM, GB | 20.807 |
|
| 112 |
-
| Load peak reserved VRAM, GB | 20.928 |
|
| 113 |
-
|
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
| 120 |
-
|
|
| 121 |
-
|
|
| 122 |
-
|
|
| 123 |
-
|
|
| 124 |
-
|
|
| 125 |
-
|
|
| 126 |
-
|
|
| 127 |
-
|
|
| 128 |
-
|
|
| 129 |
-
| P09 | Renaissance Lab Notebook | 109 | 1.180 | 2.855 | +141.9% | 25.438 | 19.719 |  |  |
|
| 130 |
-
| P10 | Russian Provincial Print Shop | 110 | 3.749 | 2.888 | -23.0% | 25.459 | 19.766 |  |  |
|
| 131 |
|
| 132 |
## Full Prompts
|
| 133 |
|
|
@@ -184,16 +176,4 @@ An alternate-history Renaissance laboratory where an astronomer-painter is combi
|
|
| 184 |
|
| 185 |
## Notes
|
| 186 |
|
| 187 |
-
This checkpoint is intended for research and evaluation. It inherits the upstream Lens limitations and responsible AI considerations from the source model.
|
| 188 |
-
The comparison images are deterministic for the listed seeds under this environment but may vary across driver, PyTorch, SDNQ, or kernel versions.
|
| 189 |
-
|
| 190 |
-
## Citation
|
| 191 |
-
|
| 192 |
-
```bibtex
|
| 193 |
-
@article{zhao2026lens,
|
| 194 |
-
title = {Lens: Rethinking Training Efficiency for Foundational Text-to-Image Models},
|
| 195 |
-
author = {Guo, Baining and Luo, Chong and Chen, Dong and Chen, Dongdong and Wei, Fangyun and Li, Ji and Bao, Jianmin and Zhang, Jiawei and Zhao, Jinjing and Shi, Lei and Yang, Qinhong and Zhang, Sirui and Wu, Xiuyu and Feng, Xuelu and Lu, Yan and Dong, Yanchen and Yue, Yang and Wang, Yitong and Chen, Yunuo and Liang, Zhiyang and Wan, Ziyu},
|
| 196 |
-
journal = {arXiv preprint arXiv:2605.21573},
|
| 197 |
-
year = {2026}
|
| 198 |
-
}
|
| 199 |
-
```
|
|
|
|
| 13 |
- quantized
|
| 14 |
- uint4
|
| 15 |
- static-quantization
|
| 16 |
+
- ablation
|
| 17 |
base_model: microsoft/Lens-Turbo
|
| 18 |
---
|
| 19 |
|
| 20 |
+
# Lens-Turbo SDNQ uint4 static
|
| 21 |
|
| 22 |
+
This is a corrected SDNQ static UINT4 quantized variant of [microsoft/Lens-Turbo](https://huggingface.co/microsoft/Lens-Turbo).
|
|
|
|
| 23 |
|
| 24 |
+
The first all-linear UINT4 attempt produced periodic grid artifacts and badly degraded text. An ablation found the culprit: quantizing the transformer block modulation linears (`img_mod` and `txt_mod`) damages Lens-Turbo disproportionately. This revision keeps those modulation layers in bfloat16 and quantizes the rest of the denoising transformer with SDNQ UINT4.
|
| 25 |
|
| 26 |
+
## Visual Comparison
|
| 27 |
+
|
| 28 |
+
[Raw comparison grid](https://huggingface.co/WaveCut/Lens-Turbo-SDNQ-uint4-static/resolve/main/assets/comparison/comparison_grid.jpg)
|
| 29 |
+
|
| 30 |
+

|
| 31 |
+
|
| 32 |
+
## Quantization Recipe
|
| 33 |
|
| 34 |
| Field | Value |
|
| 35 |
| --- | --- |
|
| 36 |
+
| Method | SDNQ uint4 static |
|
| 37 |
| Quantized component | `transformer` / `LensTransformer2DModel` |
|
| 38 |
+
| Excluded transformer layers | `*.img_mod.*`, `*.txt_mod.*` |
|
| 39 |
+
| Reason for exclusion | UINT4 quantization of modulation linears caused periodic grid artifacts and severe text degradation |
|
| 40 |
| Weight dtype | `uint4` |
|
| 41 |
| Quantized matmul | enabled |
|
| 42 |
| Quantized matmul dtype | `int8` |
|
|
|
|
| 43 |
| Dynamic quantization | disabled |
|
| 44 |
| SVDQuant | disabled |
|
| 45 |
| Hadamard rotation | disabled |
|
|
|
|
|
|
|
| 46 |
| Text encoder | unchanged from source checkpoint |
|
| 47 |
| VAE | unchanged from source checkpoint |
|
| 48 |
| Compute dtype | `torch.bfloat16` |
|
| 49 |
+
| Quantization time | 0.178 s |
|
|
|
|
| 50 |
|
| 51 |
```json
|
| 52 |
{
|
| 53 |
"weights_dtype": "uint4",
|
| 54 |
"quantized_matmul_dtype": "int8",
|
| 55 |
+
"group_size": 0,
|
| 56 |
+
"use_static_quantization": true,
|
| 57 |
+
"use_dynamic_quantization": false,
|
| 58 |
+
"use_quantized_matmul": true,
|
| 59 |
+
"use_svd": false,
|
| 60 |
+
"use_hadamard": false,
|
| 61 |
+
"quant_conv": false,
|
| 62 |
+
"quant_embedding": false,
|
| 63 |
+
"dequantize_fp32": false,
|
| 64 |
+
"modules_to_not_convert": [
|
| 65 |
+
"*.img_mod.*",
|
| 66 |
+
"*.txt_mod.*"
|
| 67 |
+
],
|
| 68 |
+
"modules_to_not_use_matmul": [],
|
| 69 |
"quantization_device": "cuda",
|
| 70 |
"return_device": "cuda"
|
| 71 |
}
|
|
|
|
| 73 |
|
| 74 |
## Usage
|
| 75 |
|
|
|
|
|
|
|
| 76 |
```python
|
| 77 |
import torch
|
| 78 |
from huggingface_hub import snapshot_download
|
| 79 |
from lens import LensPipeline, LensTransformer2DModel
|
| 80 |
from sdnq import load_sdnq_model
|
| 81 |
|
| 82 |
+
model_dir = snapshot_download("WaveCut/Lens-Turbo-SDNQ-uint4-static")
|
|
|
|
|
|
|
| 83 |
transformer = load_sdnq_model(
|
| 84 |
model_dir + "/transformer",
|
| 85 |
model_cls=LensTransformer2DModel,
|
|
|
|
| 88 |
dequantize_fp32=False,
|
| 89 |
use_quantized_matmul=True,
|
| 90 |
)
|
|
|
|
| 91 |
pipe = LensPipeline.from_pretrained(
|
| 92 |
model_dir,
|
| 93 |
transformer=transformer,
|
| 94 |
torch_dtype=torch.bfloat16,
|
| 95 |
).to("cuda")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
```
|
| 97 |
|
| 98 |
## Benchmark
|
| 99 |
|
| 100 |
+
Hardware: RunPod NVIDIA H100 80GB HBM3, PyTorch 2.8.0 CUDA 12.8 container, local container disk only. Benchmark date: 2026-05-24.
|
|
|
|
| 101 |
|
| 102 |
+
| Metric | Original Lens-Turbo | SDNQ uint4 static fixed |
|
| 103 |
| --- | ---: | ---: |
|
| 104 |
+
| Load time, seconds | 19.272 | 13.461 |
|
| 105 |
+
| Load peak allocated VRAM, GB | 20.807 | 17.179 |
|
| 106 |
+
| Load peak reserved VRAM, GB | 20.928 | 17.244 |
|
| 107 |
+
| Average prompt runtime, seconds | 1.728 | 3.663 |
|
| 108 |
+
|
| 109 |
+
## 10-Prompt Matrix
|
| 110 |
+
|
| 111 |
+
| ID | Scenario | Seed | Original time, s | Quant time, s | Delta | Original peak allocated VRAM, GB | Quant peak allocated VRAM, GB |
|
| 112 |
+
| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |
|
| 113 |
+
| P01 | Orbital Night Market | 101 | 1.579 | 2.268 | +43.6% | 23.245 | 19.585 |
|
| 114 |
+
| P02 | Arctic Research Desk | 102 | 1.370 | 4.307 | +214.4% | 23.245 | 19.585 |
|
| 115 |
+
| P03 | Victorian Automaton Repair | 103 | 3.190 | 4.111 | +28.9% | 23.244 | 19.585 |
|
| 116 |
+
| P04 | Mars Greenhouse Control Room | 104 | 1.191 | 4.094 | +243.7% | 23.242 | 19.582 |
|
| 117 |
+
| P05 | Lost Railway Poster Wall | 105 | 1.195 | 3.672 | +207.3% | 23.242 | 19.582 |
|
| 118 |
+
| P06 | Miniature Courtroom Diorama | 106 | 1.188 | 3.577 | +201.1% | 23.244 | 19.584 |
|
| 119 |
+
| P07 | Rainy Seoul Book Cafe | 107 | 1.190 | 3.597 | +202.3% | 23.244 | 19.585 |
|
| 120 |
+
| P08 | Oceanographic Expedition Map | 108 | 1.184 | 3.695 | +212.1% | 23.244 | 19.584 |
|
| 121 |
+
| P09 | Renaissance Lab Notebook | 109 | 1.197 | 3.648 | +204.8% | 23.242 | 19.582 |
|
| 122 |
+
| P10 | Russian Provincial Print Shop | 110 | 3.993 | 3.664 | -8.2% | 23.252 | 19.593 |
|
|
|
|
|
|
|
| 123 |
|
| 124 |
## Full Prompts
|
| 125 |
|
|
|
|
| 176 |
|
| 177 |
## Notes
|
| 178 |
|
| 179 |
+
This checkpoint is intended for research and evaluation. It inherits the upstream Lens limitations and responsible AI considerations from the source model. Text rendering remains challenging, but the corrected recipe removes the obvious grid/printed texture failure seen in the all-linear UINT4 attempt.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/comparison/comparison_grid.jpg
ADDED
|
Git LFS Details
|
assets/comparison/p01_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p02_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p03_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p04_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p05_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p06_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p07_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p08_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p09_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
assets/comparison/p10_sdnq_uint4_static_fixed.png
ADDED
|
Git LFS Details
|
benchmark_metrics.json
CHANGED
|
@@ -1,19 +1,19 @@
|
|
| 1 |
{
|
| 2 |
"load": {
|
| 3 |
"base": {
|
| 4 |
-
"load_time_s":
|
| 5 |
"peak_allocated_gb": 20.807,
|
| 6 |
"peak_reserved_gb": 20.928,
|
| 7 |
"end_allocated_gb": 20.807,
|
| 8 |
"end_reserved_gb": 20.928
|
| 9 |
},
|
| 10 |
"quant": {
|
| 11 |
-
"load_time_s":
|
| 12 |
-
"peak_allocated_gb":
|
| 13 |
-
"peak_reserved_gb":
|
| 14 |
-
"end_allocated_gb":
|
| 15 |
-
"end_reserved_gb":
|
| 16 |
}
|
| 17 |
},
|
| 18 |
-
"quantization_time_s":
|
| 19 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"load": {
|
| 3 |
"base": {
|
| 4 |
+
"load_time_s": 19.272,
|
| 5 |
"peak_allocated_gb": 20.807,
|
| 6 |
"peak_reserved_gb": 20.928,
|
| 7 |
"end_allocated_gb": 20.807,
|
| 8 |
"end_reserved_gb": 20.928
|
| 9 |
},
|
| 10 |
"quant": {
|
| 11 |
+
"load_time_s": 13.461,
|
| 12 |
+
"peak_allocated_gb": 17.179,
|
| 13 |
+
"peak_reserved_gb": 17.244,
|
| 14 |
+
"end_allocated_gb": 17.179,
|
| 15 |
+
"end_reserved_gb": 17.244
|
| 16 |
}
|
| 17 |
},
|
| 18 |
+
"quantization_time_s": 0.178
|
| 19 |
}
|
comparison_matrix.json
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
"seed": 101,
|
| 11 |
"prompt": "A dense cinematic night market inside a transparent orbital habitat, with Earth curving below the glass floor, vendors selling glowing algae noodles and tiny repair drones, rain droplets floating in zero gravity, reflections on wet metal, and at least six readable signs in different places: a vertical neon sign saying \"ORBITAL TEA HOUSE\", a handwritten chalk menu saying \"NO GRAVITY REFUNDS\", a yellow safety placard saying \"MAG BOOTS REQUIRED\", a small receipt printer label saying \"BAY 12 PICKUP\", a red banner saying \"FRESH SYNTH-MANGO\", and a blue customs notice saying \"DECLARE ALL MOON ROCKS\". Ultra detailed, wide angle, layered crowd, realistic lens flare, crisp small typography.",
|
| 12 |
"image": "assets/comparison/p01_original.png",
|
| 13 |
-
"time_s":
|
| 14 |
"peak_allocated_gb": 23.245,
|
| 15 |
"peak_reserved_gb": 25.438,
|
| 16 |
"end_allocated_gb": 20.841,
|
|
@@ -21,14 +21,14 @@
|
|
| 21 |
"title": "Orbital Night Market",
|
| 22 |
"seed": 101,
|
| 23 |
"prompt": "A dense cinematic night market inside a transparent orbital habitat, with Earth curving below the glass floor, vendors selling glowing algae noodles and tiny repair drones, rain droplets floating in zero gravity, reflections on wet metal, and at least six readable signs in different places: a vertical neon sign saying \"ORBITAL TEA HOUSE\", a handwritten chalk menu saying \"NO GRAVITY REFUNDS\", a yellow safety placard saying \"MAG BOOTS REQUIRED\", a small receipt printer label saying \"BAY 12 PICKUP\", a red banner saying \"FRESH SYNTH-MANGO\", and a blue customs notice saying \"DECLARE ALL MOON ROCKS\". Ultra detailed, wide angle, layered crowd, realistic lens flare, crisp small typography.",
|
| 24 |
-
"image": "assets/comparison/
|
| 25 |
-
"time_s":
|
| 26 |
-
"peak_allocated_gb":
|
| 27 |
-
"peak_reserved_gb":
|
| 28 |
-
"end_allocated_gb":
|
| 29 |
-
"end_reserved_gb":
|
| 30 |
},
|
| 31 |
-
"speed_delta_pct":
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"id": "P02",
|
|
@@ -41,7 +41,7 @@
|
|
| 41 |
"seed": 102,
|
| 42 |
"prompt": "A top-down documentary photo of an Arctic climate research desk inside a weather station during a blizzard, with ice crystals on the window, a rugged laptop displaying a complex map, three paper field notebooks, sample vials, a steaming enamel mug, and long English text on multiple objects: the notebook cover reads \"FIELD LOG: STATION NORD, WEEK 17\", a whiteboard in the background reads \"CORE DEPTH 42.8m / TEMP -31C / WIND 62 km/h\", a red tag on a sample tube reads \"DO NOT THAW\", and a printed memo reads \"CALIBRATE SENSORS BEFORE SUNRISE\". Natural cold light, precise shadows, photorealistic texture, no blurry text.",
|
| 43 |
"image": "assets/comparison/p02_original.png",
|
| 44 |
-
"time_s": 1.
|
| 45 |
"peak_allocated_gb": 23.245,
|
| 46 |
"peak_reserved_gb": 25.434,
|
| 47 |
"end_allocated_gb": 20.841,
|
|
@@ -52,14 +52,14 @@
|
|
| 52 |
"title": "Arctic Research Desk",
|
| 53 |
"seed": 102,
|
| 54 |
"prompt": "A top-down documentary photo of an Arctic climate research desk inside a weather station during a blizzard, with ice crystals on the window, a rugged laptop displaying a complex map, three paper field notebooks, sample vials, a steaming enamel mug, and long English text on multiple objects: the notebook cover reads \"FIELD LOG: STATION NORD, WEEK 17\", a whiteboard in the background reads \"CORE DEPTH 42.8m / TEMP -31C / WIND 62 km/h\", a red tag on a sample tube reads \"DO NOT THAW\", and a printed memo reads \"CALIBRATE SENSORS BEFORE SUNRISE\". Natural cold light, precise shadows, photorealistic texture, no blurry text.",
|
| 55 |
-
"image": "assets/comparison/
|
| 56 |
-
"time_s":
|
| 57 |
-
"peak_allocated_gb":
|
| 58 |
-
"peak_reserved_gb":
|
| 59 |
-
"end_allocated_gb":
|
| 60 |
-
"end_reserved_gb":
|
| 61 |
},
|
| 62 |
-
"speed_delta_pct":
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"id": "P03",
|
|
@@ -72,7 +72,7 @@
|
|
| 72 |
"seed": 103,
|
| 73 |
"prompt": "A richly detailed Victorian workshop where a brass clockwork automaton is being repaired under green banker lamps, with tiny gears, pearl inlays, oiled leather belts, smoke from a soldering iron, magnifying glass distortion, and handwritten labels everywhere. The main blueprint title must read \"AUTOMATON HAND ASSEMBLY REV. C\", a drawer label says \"SPRINGS / EYES / MEMORY CAMS\", a dangling tag says \"CLIENT: LADY ADA\", and a note pinned to the wall says \"DO NOT WIND PAST MIDNIGHT\". Moody chiaroscuro, shallow depth of field, extremely fine mechanical detail.",
|
| 74 |
"image": "assets/comparison/p03_original.png",
|
| 75 |
-
"time_s":
|
| 76 |
"peak_allocated_gb": 23.244,
|
| 77 |
"peak_reserved_gb": 25.438,
|
| 78 |
"end_allocated_gb": 20.841,
|
|
@@ -83,14 +83,14 @@
|
|
| 83 |
"title": "Victorian Automaton Repair",
|
| 84 |
"seed": 103,
|
| 85 |
"prompt": "A richly detailed Victorian workshop where a brass clockwork automaton is being repaired under green banker lamps, with tiny gears, pearl inlays, oiled leather belts, smoke from a soldering iron, magnifying glass distortion, and handwritten labels everywhere. The main blueprint title must read \"AUTOMATON HAND ASSEMBLY REV. C\", a drawer label says \"SPRINGS / EYES / MEMORY CAMS\", a dangling tag says \"CLIENT: LADY ADA\", and a note pinned to the wall says \"DO NOT WIND PAST MIDNIGHT\". Moody chiaroscuro, shallow depth of field, extremely fine mechanical detail.",
|
| 86 |
-
"image": "assets/comparison/
|
| 87 |
-
"time_s":
|
| 88 |
-
"peak_allocated_gb":
|
| 89 |
-
"peak_reserved_gb":
|
| 90 |
-
"end_allocated_gb":
|
| 91 |
-
"end_reserved_gb":
|
| 92 |
},
|
| 93 |
-
"speed_delta_pct":
|
| 94 |
},
|
| 95 |
{
|
| 96 |
"id": "P04",
|
|
@@ -103,7 +103,7 @@
|
|
| 103 |
"seed": 104,
|
| 104 |
"prompt": "A believable Mars greenhouse control room at dawn, red dust outside the curved windows, rows of tomatoes and dwarf wheat under violet grow lights, condensation on transparent tubes, a tired botanist reflected in a touchscreen, and several readable UI panels in English: \"OXYGEN LOOP STABLE\", \"WATER RECOVERY 98.4%\", \"SECTOR C: POLLINATION DRONES ACTIVE\", and a sticky note saying \"Tell Earth the basil survived\". Technical but warm, high resolution, realistic sci-fi, detailed glass and plant textures.",
|
| 105 |
"image": "assets/comparison/p04_original.png",
|
| 106 |
-
"time_s": 1.
|
| 107 |
"peak_allocated_gb": 23.242,
|
| 108 |
"peak_reserved_gb": 25.438,
|
| 109 |
"end_allocated_gb": 20.841,
|
|
@@ -114,14 +114,14 @@
|
|
| 114 |
"title": "Mars Greenhouse Control Room",
|
| 115 |
"seed": 104,
|
| 116 |
"prompt": "A believable Mars greenhouse control room at dawn, red dust outside the curved windows, rows of tomatoes and dwarf wheat under violet grow lights, condensation on transparent tubes, a tired botanist reflected in a touchscreen, and several readable UI panels in English: \"OXYGEN LOOP STABLE\", \"WATER RECOVERY 98.4%\", \"SECTOR C: POLLINATION DRONES ACTIVE\", and a sticky note saying \"Tell Earth the basil survived\". Technical but warm, high resolution, realistic sci-fi, detailed glass and plant textures.",
|
| 117 |
-
"image": "assets/comparison/
|
| 118 |
-
"time_s":
|
| 119 |
-
"peak_allocated_gb":
|
| 120 |
-
"peak_reserved_gb":
|
| 121 |
-
"end_allocated_gb":
|
| 122 |
-
"end_reserved_gb":
|
| 123 |
},
|
| 124 |
-
"speed_delta_pct":
|
| 125 |
},
|
| 126 |
{
|
| 127 |
"id": "P05",
|
|
@@ -134,7 +134,7 @@
|
|
| 134 |
"seed": 105,
|
| 135 |
"prompt": "An abandoned underground railway platform turned into an accidental archive of travel posters, peeling ceramic tiles, puddles reflecting amber emergency lights, old suitcases, vines growing through cracked concrete, and five large posters with distinct readable titles: \"THE NORTHERN COMET EXPRESS\", \"SLEEPER TO ISTANBUL\", \"MIDNIGHT PLATFORM 7\", \"COASTAL ROUTE REOPENING SOON\", and \"KEEP YOUR TICKET VISIBLE\". Cinematic composition, wet surfaces, layered typography, realistic grime, strong perspective down the tracks.",
|
| 136 |
"image": "assets/comparison/p05_original.png",
|
| 137 |
-
"time_s": 1.
|
| 138 |
"peak_allocated_gb": 23.242,
|
| 139 |
"peak_reserved_gb": 25.438,
|
| 140 |
"end_allocated_gb": 20.841,
|
|
@@ -145,14 +145,14 @@
|
|
| 145 |
"title": "Lost Railway Poster Wall",
|
| 146 |
"seed": 105,
|
| 147 |
"prompt": "An abandoned underground railway platform turned into an accidental archive of travel posters, peeling ceramic tiles, puddles reflecting amber emergency lights, old suitcases, vines growing through cracked concrete, and five large posters with distinct readable titles: \"THE NORTHERN COMET EXPRESS\", \"SLEEPER TO ISTANBUL\", \"MIDNIGHT PLATFORM 7\", \"COASTAL ROUTE REOPENING SOON\", and \"KEEP YOUR TICKET VISIBLE\". Cinematic composition, wet surfaces, layered typography, realistic grime, strong perspective down the tracks.",
|
| 148 |
-
"image": "assets/comparison/
|
| 149 |
-
"time_s":
|
| 150 |
-
"peak_allocated_gb":
|
| 151 |
-
"peak_reserved_gb":
|
| 152 |
-
"end_allocated_gb":
|
| 153 |
-
"end_reserved_gb":
|
| 154 |
},
|
| 155 |
-
"speed_delta_pct":
|
| 156 |
},
|
| 157 |
{
|
| 158 |
"id": "P06",
|
|
@@ -165,7 +165,7 @@
|
|
| 165 |
"seed": 106,
|
| 166 |
"prompt": "A hyperreal macro photograph of a miniature courtroom diorama built inside an antique wooden drawer, with tiny judge bench, brass lamps, dust motes, paper exhibits smaller than postage stamps, a mouse-sized witness chair, and readable text on tiny documents: a case file labeled \"CASE 1842-B: THE MISSING ORRERY\", an evidence tag saying \"EXHIBIT C\", a court calendar reading \"HEARING AT 9:30\", and a placard on the judge bench saying \"TRUTH IN SMALL THINGS\". Macro lens, tactile materials, careful scale cues.",
|
| 167 |
"image": "assets/comparison/p06_original.png",
|
| 168 |
-
"time_s": 1.
|
| 169 |
"peak_allocated_gb": 23.244,
|
| 170 |
"peak_reserved_gb": 25.438,
|
| 171 |
"end_allocated_gb": 20.841,
|
|
@@ -176,14 +176,14 @@
|
|
| 176 |
"title": "Miniature Courtroom Diorama",
|
| 177 |
"seed": 106,
|
| 178 |
"prompt": "A hyperreal macro photograph of a miniature courtroom diorama built inside an antique wooden drawer, with tiny judge bench, brass lamps, dust motes, paper exhibits smaller than postage stamps, a mouse-sized witness chair, and readable text on tiny documents: a case file labeled \"CASE 1842-B: THE MISSING ORRERY\", an evidence tag saying \"EXHIBIT C\", a court calendar reading \"HEARING AT 9:30\", and a placard on the judge bench saying \"TRUTH IN SMALL THINGS\". Macro lens, tactile materials, careful scale cues.",
|
| 179 |
-
"image": "assets/comparison/
|
| 180 |
-
"time_s":
|
| 181 |
-
"peak_allocated_gb":
|
| 182 |
-
"peak_reserved_gb":
|
| 183 |
-
"end_allocated_gb":
|
| 184 |
-
"end_reserved_gb":
|
| 185 |
},
|
| 186 |
-
"speed_delta_pct":
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"id": "P07",
|
|
@@ -196,7 +196,7 @@
|
|
| 196 |
"seed": 107,
|
| 197 |
"prompt": "A cozy but complex rainy evening scene in a narrow Seoul book cafe, viewed through a window covered in raindrops, shelves packed with art books, two students annotating a map, a barista steaming milk, warm tungsten light, street reflections, and multiple readable English text elements: a chalkboard says \"TONIGHT: QUIET READING CLUB\", a receipt says \"OAT LATTE / CINNAMON BUN\", a book spine says \"ARCHITECTURE OF DREAMS\", and a window sticker says \"OPEN UNTIL THE LAST TRAIN\". Photorealistic, cinematic, intricate reflections.",
|
| 198 |
"image": "assets/comparison/p07_original.png",
|
| 199 |
-
"time_s": 1.
|
| 200 |
"peak_allocated_gb": 23.244,
|
| 201 |
"peak_reserved_gb": 25.438,
|
| 202 |
"end_allocated_gb": 20.841,
|
|
@@ -207,14 +207,14 @@
|
|
| 207 |
"title": "Rainy Seoul Book Cafe",
|
| 208 |
"seed": 107,
|
| 209 |
"prompt": "A cozy but complex rainy evening scene in a narrow Seoul book cafe, viewed through a window covered in raindrops, shelves packed with art books, two students annotating a map, a barista steaming milk, warm tungsten light, street reflections, and multiple readable English text elements: a chalkboard says \"TONIGHT: QUIET READING CLUB\", a receipt says \"OAT LATTE / CINNAMON BUN\", a book spine says \"ARCHITECTURE OF DREAMS\", and a window sticker says \"OPEN UNTIL THE LAST TRAIN\". Photorealistic, cinematic, intricate reflections.",
|
| 210 |
-
"image": "assets/comparison/
|
| 211 |
-
"time_s":
|
| 212 |
-
"peak_allocated_gb":
|
| 213 |
-
"peak_reserved_gb":
|
| 214 |
-
"end_allocated_gb":
|
| 215 |
-
"end_reserved_gb":
|
| 216 |
},
|
| 217 |
-
"speed_delta_pct":
|
| 218 |
},
|
| 219 |
{
|
| 220 |
"id": "P08",
|
|
@@ -227,7 +227,7 @@
|
|
| 227 |
"seed": 108,
|
| 228 |
"prompt": "A dramatic captain's table aboard a storm-tossed oceanographic research vessel, with a wet nautical chart, brass dividers, sonar printouts, bioluminescent plankton glowing in a glass jar, a cracked tablet, and readable labels distributed across the image: \"TRENCH SURVEY LINE B\", \"DEPTH 10,928m\", \"ROV SIGNAL WEAK\", \"SAMPLE: BLUE VENT WATER\", and a torn note saying \"If the lights pulse twice, turn back\". High detail, realistic water droplets, dark blue-green atmosphere, sharp text.",
|
| 229 |
"image": "assets/comparison/p08_original.png",
|
| 230 |
-
"time_s": 1.
|
| 231 |
"peak_allocated_gb": 23.244,
|
| 232 |
"peak_reserved_gb": 25.438,
|
| 233 |
"end_allocated_gb": 20.841,
|
|
@@ -238,14 +238,14 @@
|
|
| 238 |
"title": "Oceanographic Expedition Map",
|
| 239 |
"seed": 108,
|
| 240 |
"prompt": "A dramatic captain's table aboard a storm-tossed oceanographic research vessel, with a wet nautical chart, brass dividers, sonar printouts, bioluminescent plankton glowing in a glass jar, a cracked tablet, and readable labels distributed across the image: \"TRENCH SURVEY LINE B\", \"DEPTH 10,928m\", \"ROV SIGNAL WEAK\", \"SAMPLE: BLUE VENT WATER\", and a torn note saying \"If the lights pulse twice, turn back\". High detail, realistic water droplets, dark blue-green atmosphere, sharp text.",
|
| 241 |
-
"image": "assets/comparison/
|
| 242 |
-
"time_s":
|
| 243 |
-
"peak_allocated_gb":
|
| 244 |
-
"peak_reserved_gb":
|
| 245 |
-
"end_allocated_gb":
|
| 246 |
-
"end_reserved_gb":
|
| 247 |
},
|
| 248 |
-
"speed_delta_pct":
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"id": "P09",
|
|
@@ -258,7 +258,7 @@
|
|
| 258 |
"seed": 109,
|
| 259 |
"prompt": "An alternate-history Renaissance laboratory where an astronomer-painter is combining oil pigments with early electrical apparatus, with celestial globes, copper coils, stained glass sunlight, anatomical sketches, a half-finished portrait, and Latin-English notebook text visible on several pages: \"LIGHT STUDY: BLUE VERDITER\", \"GALVANIC TEST NO. 8\", \"VENUS RISES BEFORE DAWN\", and a folded letter sealed in wax reading \"FOR THE WORKSHOP MASTER ONLY\". Painterly realism, ornate detail, coherent objects, readable calligraphy.",
|
| 260 |
"image": "assets/comparison/p09_original.png",
|
| 261 |
-
"time_s": 1.
|
| 262 |
"peak_allocated_gb": 23.242,
|
| 263 |
"peak_reserved_gb": 25.438,
|
| 264 |
"end_allocated_gb": 20.841,
|
|
@@ -269,14 +269,14 @@
|
|
| 269 |
"title": "Renaissance Lab Notebook",
|
| 270 |
"seed": 109,
|
| 271 |
"prompt": "An alternate-history Renaissance laboratory where an astronomer-painter is combining oil pigments with early electrical apparatus, with celestial globes, copper coils, stained glass sunlight, anatomical sketches, a half-finished portrait, and Latin-English notebook text visible on several pages: \"LIGHT STUDY: BLUE VERDITER\", \"GALVANIC TEST NO. 8\", \"VENUS RISES BEFORE DAWN\", and a folded letter sealed in wax reading \"FOR THE WORKSHOP MASTER ONLY\". Painterly realism, ornate detail, coherent objects, readable calligraphy.",
|
| 272 |
-
"image": "assets/comparison/
|
| 273 |
-
"time_s":
|
| 274 |
-
"peak_allocated_gb":
|
| 275 |
-
"peak_reserved_gb":
|
| 276 |
-
"end_allocated_gb":
|
| 277 |
-
"end_reserved_gb":
|
| 278 |
},
|
| 279 |
-
"speed_delta_pct":
|
| 280 |
},
|
| 281 |
{
|
| 282 |
"id": "P10",
|
|
@@ -289,7 +289,7 @@
|
|
| 289 |
"seed": 110,
|
| 290 |
"prompt": "Сложная фотореалистичная сцена в старой провинциальной типографии поздним зимним вечером: за большим деревянным столом лежат металлические литеры, корректурные листы, линейки, чашка крепкого чая, заснеженное окно, тусклая лампа и следы типографской краски на пальцах наборщика. На разных элементах изображения должен быть длинный и хорошо читаемый русский текст: на вывеске над дверью написано \"ТИПОГРАФИЯ СЕВЕРНЫЙ ЛИСТОК\", на корректуре заголовок \"СРОЧНО В НОМЕР: ГОРОДСКОЙ СОВЕТ ОТКРЫВАЕТ НОВУЮ БИБЛИОТЕКУ\", на маленькой записке фраза \"Проверить букву Ё во втором абзаце\", а на календаре дата \"Пятница, 24 января\". Много бытовых деталей, глубокие тени, реалистичная кириллица, никакой размытой каши вместо текста.",
|
| 291 |
"image": "assets/comparison/p10_original.png",
|
| 292 |
-
"time_s": 3.
|
| 293 |
"peak_allocated_gb": 23.252,
|
| 294 |
"peak_reserved_gb": 25.459,
|
| 295 |
"end_allocated_gb": 20.841,
|
|
@@ -300,13 +300,13 @@
|
|
| 300 |
"title": "Russian Provincial Print Shop",
|
| 301 |
"seed": 110,
|
| 302 |
"prompt": "Сложная фотореалистичная сцена в старой провинциальной типографии поздним зимним вечером: за большим деревянным столом лежат металлические литеры, корректурные листы, линейки, чашка крепкого чая, заснеженное окно, тусклая лампа и следы типографской краски на пальцах наборщика. На разных элементах изображения должен быть длинный и хорошо читаемый русский текст: на вывеске над дверью написано \"ТИПОГРАФИЯ СЕВЕРНЫЙ ЛИСТОК\", на корректуре заголовок \"СРОЧНО В НОМЕР: ГОРОДСКОЙ СОВЕТ ОТКРЫВАЕТ НОВУЮ БИБЛИОТЕКУ\", на маленькой записке фраза \"Проверить букву Ё во втором абзаце\", а на календаре дата \"Пятница, 24 января\". Много бытовых деталей, глубокие тени, реалистичная кириллица, никакой размытой каши вместо текста.",
|
| 303 |
-
"image": "assets/comparison/
|
| 304 |
-
"time_s":
|
| 305 |
-
"peak_allocated_gb":
|
| 306 |
-
"peak_reserved_gb":
|
| 307 |
-
"end_allocated_gb":
|
| 308 |
-
"end_reserved_gb":
|
| 309 |
},
|
| 310 |
-
"speed_delta_pct": -
|
| 311 |
}
|
| 312 |
]
|
|
|
|
| 10 |
"seed": 101,
|
| 11 |
"prompt": "A dense cinematic night market inside a transparent orbital habitat, with Earth curving below the glass floor, vendors selling glowing algae noodles and tiny repair drones, rain droplets floating in zero gravity, reflections on wet metal, and at least six readable signs in different places: a vertical neon sign saying \"ORBITAL TEA HOUSE\", a handwritten chalk menu saying \"NO GRAVITY REFUNDS\", a yellow safety placard saying \"MAG BOOTS REQUIRED\", a small receipt printer label saying \"BAY 12 PICKUP\", a red banner saying \"FRESH SYNTH-MANGO\", and a blue customs notice saying \"DECLARE ALL MOON ROCKS\". Ultra detailed, wide angle, layered crowd, realistic lens flare, crisp small typography.",
|
| 12 |
"image": "assets/comparison/p01_original.png",
|
| 13 |
+
"time_s": 1.579,
|
| 14 |
"peak_allocated_gb": 23.245,
|
| 15 |
"peak_reserved_gb": 25.438,
|
| 16 |
"end_allocated_gb": 20.841,
|
|
|
|
| 21 |
"title": "Orbital Night Market",
|
| 22 |
"seed": 101,
|
| 23 |
"prompt": "A dense cinematic night market inside a transparent orbital habitat, with Earth curving below the glass floor, vendors selling glowing algae noodles and tiny repair drones, rain droplets floating in zero gravity, reflections on wet metal, and at least six readable signs in different places: a vertical neon sign saying \"ORBITAL TEA HOUSE\", a handwritten chalk menu saying \"NO GRAVITY REFUNDS\", a yellow safety placard saying \"MAG BOOTS REQUIRED\", a small receipt printer label saying \"BAY 12 PICKUP\", a red banner saying \"FRESH SYNTH-MANGO\", and a blue customs notice saying \"DECLARE ALL MOON ROCKS\". Ultra detailed, wide angle, layered crowd, realistic lens flare, crisp small typography.",
|
| 24 |
+
"image": "assets/comparison/p01_sdnq_uint4_static_fixed.png",
|
| 25 |
+
"time_s": 2.268,
|
| 26 |
+
"peak_allocated_gb": 19.585,
|
| 27 |
+
"peak_reserved_gb": 21.57,
|
| 28 |
+
"end_allocated_gb": 17.181,
|
| 29 |
+
"end_reserved_gb": 21.57
|
| 30 |
},
|
| 31 |
+
"speed_delta_pct": 43.6
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"id": "P02",
|
|
|
|
| 41 |
"seed": 102,
|
| 42 |
"prompt": "A top-down documentary photo of an Arctic climate research desk inside a weather station during a blizzard, with ice crystals on the window, a rugged laptop displaying a complex map, three paper field notebooks, sample vials, a steaming enamel mug, and long English text on multiple objects: the notebook cover reads \"FIELD LOG: STATION NORD, WEEK 17\", a whiteboard in the background reads \"CORE DEPTH 42.8m / TEMP -31C / WIND 62 km/h\", a red tag on a sample tube reads \"DO NOT THAW\", and a printed memo reads \"CALIBRATE SENSORS BEFORE SUNRISE\". Natural cold light, precise shadows, photorealistic texture, no blurry text.",
|
| 43 |
"image": "assets/comparison/p02_original.png",
|
| 44 |
+
"time_s": 1.37,
|
| 45 |
"peak_allocated_gb": 23.245,
|
| 46 |
"peak_reserved_gb": 25.434,
|
| 47 |
"end_allocated_gb": 20.841,
|
|
|
|
| 52 |
"title": "Arctic Research Desk",
|
| 53 |
"seed": 102,
|
| 54 |
"prompt": "A top-down documentary photo of an Arctic climate research desk inside a weather station during a blizzard, with ice crystals on the window, a rugged laptop displaying a complex map, three paper field notebooks, sample vials, a steaming enamel mug, and long English text on multiple objects: the notebook cover reads \"FIELD LOG: STATION NORD, WEEK 17\", a whiteboard in the background reads \"CORE DEPTH 42.8m / TEMP -31C / WIND 62 km/h\", a red tag on a sample tube reads \"DO NOT THAW\", and a printed memo reads \"CALIBRATE SENSORS BEFORE SUNRISE\". Natural cold light, precise shadows, photorealistic texture, no blurry text.",
|
| 55 |
+
"image": "assets/comparison/p02_sdnq_uint4_static_fixed.png",
|
| 56 |
+
"time_s": 4.307,
|
| 57 |
+
"peak_allocated_gb": 19.585,
|
| 58 |
+
"peak_reserved_gb": 21.615,
|
| 59 |
+
"end_allocated_gb": 17.181,
|
| 60 |
+
"end_reserved_gb": 21.615
|
| 61 |
},
|
| 62 |
+
"speed_delta_pct": 214.4
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"id": "P03",
|
|
|
|
| 72 |
"seed": 103,
|
| 73 |
"prompt": "A richly detailed Victorian workshop where a brass clockwork automaton is being repaired under green banker lamps, with tiny gears, pearl inlays, oiled leather belts, smoke from a soldering iron, magnifying glass distortion, and handwritten labels everywhere. The main blueprint title must read \"AUTOMATON HAND ASSEMBLY REV. C\", a drawer label says \"SPRINGS / EYES / MEMORY CAMS\", a dangling tag says \"CLIENT: LADY ADA\", and a note pinned to the wall says \"DO NOT WIND PAST MIDNIGHT\". Moody chiaroscuro, shallow depth of field, extremely fine mechanical detail.",
|
| 74 |
"image": "assets/comparison/p03_original.png",
|
| 75 |
+
"time_s": 3.19,
|
| 76 |
"peak_allocated_gb": 23.244,
|
| 77 |
"peak_reserved_gb": 25.438,
|
| 78 |
"end_allocated_gb": 20.841,
|
|
|
|
| 83 |
"title": "Victorian Automaton Repair",
|
| 84 |
"seed": 103,
|
| 85 |
"prompt": "A richly detailed Victorian workshop where a brass clockwork automaton is being repaired under green banker lamps, with tiny gears, pearl inlays, oiled leather belts, smoke from a soldering iron, magnifying glass distortion, and handwritten labels everywhere. The main blueprint title must read \"AUTOMATON HAND ASSEMBLY REV. C\", a drawer label says \"SPRINGS / EYES / MEMORY CAMS\", a dangling tag says \"CLIENT: LADY ADA\", and a note pinned to the wall says \"DO NOT WIND PAST MIDNIGHT\". Moody chiaroscuro, shallow depth of field, extremely fine mechanical detail.",
|
| 86 |
+
"image": "assets/comparison/p03_sdnq_uint4_static_fixed.png",
|
| 87 |
+
"time_s": 4.111,
|
| 88 |
+
"peak_allocated_gb": 19.585,
|
| 89 |
+
"peak_reserved_gb": 21.619,
|
| 90 |
+
"end_allocated_gb": 17.181,
|
| 91 |
+
"end_reserved_gb": 21.619
|
| 92 |
},
|
| 93 |
+
"speed_delta_pct": 28.9
|
| 94 |
},
|
| 95 |
{
|
| 96 |
"id": "P04",
|
|
|
|
| 103 |
"seed": 104,
|
| 104 |
"prompt": "A believable Mars greenhouse control room at dawn, red dust outside the curved windows, rows of tomatoes and dwarf wheat under violet grow lights, condensation on transparent tubes, a tired botanist reflected in a touchscreen, and several readable UI panels in English: \"OXYGEN LOOP STABLE\", \"WATER RECOVERY 98.4%\", \"SECTOR C: POLLINATION DRONES ACTIVE\", and a sticky note saying \"Tell Earth the basil survived\". Technical but warm, high resolution, realistic sci-fi, detailed glass and plant textures.",
|
| 105 |
"image": "assets/comparison/p04_original.png",
|
| 106 |
+
"time_s": 1.191,
|
| 107 |
"peak_allocated_gb": 23.242,
|
| 108 |
"peak_reserved_gb": 25.438,
|
| 109 |
"end_allocated_gb": 20.841,
|
|
|
|
| 114 |
"title": "Mars Greenhouse Control Room",
|
| 115 |
"seed": 104,
|
| 116 |
"prompt": "A believable Mars greenhouse control room at dawn, red dust outside the curved windows, rows of tomatoes and dwarf wheat under violet grow lights, condensation on transparent tubes, a tired botanist reflected in a touchscreen, and several readable UI panels in English: \"OXYGEN LOOP STABLE\", \"WATER RECOVERY 98.4%\", \"SECTOR C: POLLINATION DRONES ACTIVE\", and a sticky note saying \"Tell Earth the basil survived\". Technical but warm, high resolution, realistic sci-fi, detailed glass and plant textures.",
|
| 117 |
+
"image": "assets/comparison/p04_sdnq_uint4_static_fixed.png",
|
| 118 |
+
"time_s": 4.094,
|
| 119 |
+
"peak_allocated_gb": 19.582,
|
| 120 |
+
"peak_reserved_gb": 21.6,
|
| 121 |
+
"end_allocated_gb": 17.181,
|
| 122 |
+
"end_reserved_gb": 21.6
|
| 123 |
},
|
| 124 |
+
"speed_delta_pct": 243.7
|
| 125 |
},
|
| 126 |
{
|
| 127 |
"id": "P05",
|
|
|
|
| 134 |
"seed": 105,
|
| 135 |
"prompt": "An abandoned underground railway platform turned into an accidental archive of travel posters, peeling ceramic tiles, puddles reflecting amber emergency lights, old suitcases, vines growing through cracked concrete, and five large posters with distinct readable titles: \"THE NORTHERN COMET EXPRESS\", \"SLEEPER TO ISTANBUL\", \"MIDNIGHT PLATFORM 7\", \"COASTAL ROUTE REOPENING SOON\", and \"KEEP YOUR TICKET VISIBLE\". Cinematic composition, wet surfaces, layered typography, realistic grime, strong perspective down the tracks.",
|
| 136 |
"image": "assets/comparison/p05_original.png",
|
| 137 |
+
"time_s": 1.195,
|
| 138 |
"peak_allocated_gb": 23.242,
|
| 139 |
"peak_reserved_gb": 25.438,
|
| 140 |
"end_allocated_gb": 20.841,
|
|
|
|
| 145 |
"title": "Lost Railway Poster Wall",
|
| 146 |
"seed": 105,
|
| 147 |
"prompt": "An abandoned underground railway platform turned into an accidental archive of travel posters, peeling ceramic tiles, puddles reflecting amber emergency lights, old suitcases, vines growing through cracked concrete, and five large posters with distinct readable titles: \"THE NORTHERN COMET EXPRESS\", \"SLEEPER TO ISTANBUL\", \"MIDNIGHT PLATFORM 7\", \"COASTAL ROUTE REOPENING SOON\", and \"KEEP YOUR TICKET VISIBLE\". Cinematic composition, wet surfaces, layered typography, realistic grime, strong perspective down the tracks.",
|
| 148 |
+
"image": "assets/comparison/p05_sdnq_uint4_static_fixed.png",
|
| 149 |
+
"time_s": 3.672,
|
| 150 |
+
"peak_allocated_gb": 19.582,
|
| 151 |
+
"peak_reserved_gb": 21.6,
|
| 152 |
+
"end_allocated_gb": 17.181,
|
| 153 |
+
"end_reserved_gb": 21.6
|
| 154 |
},
|
| 155 |
+
"speed_delta_pct": 207.3
|
| 156 |
},
|
| 157 |
{
|
| 158 |
"id": "P06",
|
|
|
|
| 165 |
"seed": 106,
|
| 166 |
"prompt": "A hyperreal macro photograph of a miniature courtroom diorama built inside an antique wooden drawer, with tiny judge bench, brass lamps, dust motes, paper exhibits smaller than postage stamps, a mouse-sized witness chair, and readable text on tiny documents: a case file labeled \"CASE 1842-B: THE MISSING ORRERY\", an evidence tag saying \"EXHIBIT C\", a court calendar reading \"HEARING AT 9:30\", and a placard on the judge bench saying \"TRUTH IN SMALL THINGS\". Macro lens, tactile materials, careful scale cues.",
|
| 167 |
"image": "assets/comparison/p06_original.png",
|
| 168 |
+
"time_s": 1.188,
|
| 169 |
"peak_allocated_gb": 23.244,
|
| 170 |
"peak_reserved_gb": 25.438,
|
| 171 |
"end_allocated_gb": 20.841,
|
|
|
|
| 176 |
"title": "Miniature Courtroom Diorama",
|
| 177 |
"seed": 106,
|
| 178 |
"prompt": "A hyperreal macro photograph of a miniature courtroom diorama built inside an antique wooden drawer, with tiny judge bench, brass lamps, dust motes, paper exhibits smaller than postage stamps, a mouse-sized witness chair, and readable text on tiny documents: a case file labeled \"CASE 1842-B: THE MISSING ORRERY\", an evidence tag saying \"EXHIBIT C\", a court calendar reading \"HEARING AT 9:30\", and a placard on the judge bench saying \"TRUTH IN SMALL THINGS\". Macro lens, tactile materials, careful scale cues.",
|
| 179 |
+
"image": "assets/comparison/p06_sdnq_uint4_static_fixed.png",
|
| 180 |
+
"time_s": 3.577,
|
| 181 |
+
"peak_allocated_gb": 19.584,
|
| 182 |
+
"peak_reserved_gb": 21.619,
|
| 183 |
+
"end_allocated_gb": 17.181,
|
| 184 |
+
"end_reserved_gb": 21.619
|
| 185 |
},
|
| 186 |
+
"speed_delta_pct": 201.1
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"id": "P07",
|
|
|
|
| 196 |
"seed": 107,
|
| 197 |
"prompt": "A cozy but complex rainy evening scene in a narrow Seoul book cafe, viewed through a window covered in raindrops, shelves packed with art books, two students annotating a map, a barista steaming milk, warm tungsten light, street reflections, and multiple readable English text elements: a chalkboard says \"TONIGHT: QUIET READING CLUB\", a receipt says \"OAT LATTE / CINNAMON BUN\", a book spine says \"ARCHITECTURE OF DREAMS\", and a window sticker says \"OPEN UNTIL THE LAST TRAIN\". Photorealistic, cinematic, intricate reflections.",
|
| 198 |
"image": "assets/comparison/p07_original.png",
|
| 199 |
+
"time_s": 1.19,
|
| 200 |
"peak_allocated_gb": 23.244,
|
| 201 |
"peak_reserved_gb": 25.438,
|
| 202 |
"end_allocated_gb": 20.841,
|
|
|
|
| 207 |
"title": "Rainy Seoul Book Cafe",
|
| 208 |
"seed": 107,
|
| 209 |
"prompt": "A cozy but complex rainy evening scene in a narrow Seoul book cafe, viewed through a window covered in raindrops, shelves packed with art books, two students annotating a map, a barista steaming milk, warm tungsten light, street reflections, and multiple readable English text elements: a chalkboard says \"TONIGHT: QUIET READING CLUB\", a receipt says \"OAT LATTE / CINNAMON BUN\", a book spine says \"ARCHITECTURE OF DREAMS\", and a window sticker says \"OPEN UNTIL THE LAST TRAIN\". Photorealistic, cinematic, intricate reflections.",
|
| 210 |
+
"image": "assets/comparison/p07_sdnq_uint4_static_fixed.png",
|
| 211 |
+
"time_s": 3.597,
|
| 212 |
+
"peak_allocated_gb": 19.585,
|
| 213 |
+
"peak_reserved_gb": 21.619,
|
| 214 |
+
"end_allocated_gb": 17.181,
|
| 215 |
+
"end_reserved_gb": 21.619
|
| 216 |
},
|
| 217 |
+
"speed_delta_pct": 202.3
|
| 218 |
},
|
| 219 |
{
|
| 220 |
"id": "P08",
|
|
|
|
| 227 |
"seed": 108,
|
| 228 |
"prompt": "A dramatic captain's table aboard a storm-tossed oceanographic research vessel, with a wet nautical chart, brass dividers, sonar printouts, bioluminescent plankton glowing in a glass jar, a cracked tablet, and readable labels distributed across the image: \"TRENCH SURVEY LINE B\", \"DEPTH 10,928m\", \"ROV SIGNAL WEAK\", \"SAMPLE: BLUE VENT WATER\", and a torn note saying \"If the lights pulse twice, turn back\". High detail, realistic water droplets, dark blue-green atmosphere, sharp text.",
|
| 229 |
"image": "assets/comparison/p08_original.png",
|
| 230 |
+
"time_s": 1.184,
|
| 231 |
"peak_allocated_gb": 23.244,
|
| 232 |
"peak_reserved_gb": 25.438,
|
| 233 |
"end_allocated_gb": 20.841,
|
|
|
|
| 238 |
"title": "Oceanographic Expedition Map",
|
| 239 |
"seed": 108,
|
| 240 |
"prompt": "A dramatic captain's table aboard a storm-tossed oceanographic research vessel, with a wet nautical chart, brass dividers, sonar printouts, bioluminescent plankton glowing in a glass jar, a cracked tablet, and readable labels distributed across the image: \"TRENCH SURVEY LINE B\", \"DEPTH 10,928m\", \"ROV SIGNAL WEAK\", \"SAMPLE: BLUE VENT WATER\", and a torn note saying \"If the lights pulse twice, turn back\". High detail, realistic water droplets, dark blue-green atmosphere, sharp text.",
|
| 241 |
+
"image": "assets/comparison/p08_sdnq_uint4_static_fixed.png",
|
| 242 |
+
"time_s": 3.695,
|
| 243 |
+
"peak_allocated_gb": 19.584,
|
| 244 |
+
"peak_reserved_gb": 21.619,
|
| 245 |
+
"end_allocated_gb": 17.181,
|
| 246 |
+
"end_reserved_gb": 21.619
|
| 247 |
},
|
| 248 |
+
"speed_delta_pct": 212.1
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"id": "P09",
|
|
|
|
| 258 |
"seed": 109,
|
| 259 |
"prompt": "An alternate-history Renaissance laboratory where an astronomer-painter is combining oil pigments with early electrical apparatus, with celestial globes, copper coils, stained glass sunlight, anatomical sketches, a half-finished portrait, and Latin-English notebook text visible on several pages: \"LIGHT STUDY: BLUE VERDITER\", \"GALVANIC TEST NO. 8\", \"VENUS RISES BEFORE DAWN\", and a folded letter sealed in wax reading \"FOR THE WORKSHOP MASTER ONLY\". Painterly realism, ornate detail, coherent objects, readable calligraphy.",
|
| 260 |
"image": "assets/comparison/p09_original.png",
|
| 261 |
+
"time_s": 1.197,
|
| 262 |
"peak_allocated_gb": 23.242,
|
| 263 |
"peak_reserved_gb": 25.438,
|
| 264 |
"end_allocated_gb": 20.841,
|
|
|
|
| 269 |
"title": "Renaissance Lab Notebook",
|
| 270 |
"seed": 109,
|
| 271 |
"prompt": "An alternate-history Renaissance laboratory where an astronomer-painter is combining oil pigments with early electrical apparatus, with celestial globes, copper coils, stained glass sunlight, anatomical sketches, a half-finished portrait, and Latin-English notebook text visible on several pages: \"LIGHT STUDY: BLUE VERDITER\", \"GALVANIC TEST NO. 8\", \"VENUS RISES BEFORE DAWN\", and a folded letter sealed in wax reading \"FOR THE WORKSHOP MASTER ONLY\". Painterly realism, ornate detail, coherent objects, readable calligraphy.",
|
| 272 |
+
"image": "assets/comparison/p09_sdnq_uint4_static_fixed.png",
|
| 273 |
+
"time_s": 3.648,
|
| 274 |
+
"peak_allocated_gb": 19.582,
|
| 275 |
+
"peak_reserved_gb": 21.6,
|
| 276 |
+
"end_allocated_gb": 17.181,
|
| 277 |
+
"end_reserved_gb": 21.6
|
| 278 |
},
|
| 279 |
+
"speed_delta_pct": 204.8
|
| 280 |
},
|
| 281 |
{
|
| 282 |
"id": "P10",
|
|
|
|
| 289 |
"seed": 110,
|
| 290 |
"prompt": "Сложная фотореалистичная сцена в старой провинциальной типографии поздним зимним вечером: за большим деревянным столом лежат металлические литеры, корректурные листы, линейки, чашка крепкого чая, заснеженное окно, тусклая лампа и следы типографской краски на пальцах наборщика. На разных элементах изображения должен быть длинный и хорошо читаемый русский текст: на вывеске над дверью написано \"ТИПОГРАФИЯ СЕВЕРНЫЙ ЛИСТОК\", на корректуре заголовок \"СРОЧНО В НОМЕР: ГОРОДСКОЙ СОВЕТ ОТКРЫВАЕТ НОВУЮ БИБЛИОТЕКУ\", на маленькой записке фраза \"Проверить букву Ё во втором абзаце\", а на календаре дата \"Пятница, 24 января\". Много бытовых деталей, глубокие тени, реалистичная кириллица, никакой размытой каши вместо текста.",
|
| 291 |
"image": "assets/comparison/p10_original.png",
|
| 292 |
+
"time_s": 3.993,
|
| 293 |
"peak_allocated_gb": 23.252,
|
| 294 |
"peak_reserved_gb": 25.459,
|
| 295 |
"end_allocated_gb": 20.841,
|
|
|
|
| 300 |
"title": "Russian Provincial Print Shop",
|
| 301 |
"seed": 110,
|
| 302 |
"prompt": "Сложная фотореалистичная сцена в старой провинциальной типографии поздним зимним вечером: за большим деревянным столом лежат металлические литеры, корректурные листы, линейки, чашка крепкого чая, заснеженное окно, тусклая лампа и следы типографской краски на пальцах наборщика. На разных элементах изображения должен быть длинный и хорошо читаемый русский текст: на вывеске над дверью написано \"ТИПОГРАФИЯ СЕВЕРНЫЙ ЛИСТОК\", на корректуре заголовок \"СРОЧНО В НОМЕР: ГОРОДСКОЙ СОВЕТ ОТКРЫВАЕТ НОВУЮ БИБЛИОТЕКУ\", на маленькой записке фраза \"Проверить букву Ё во втором абзаце\", а на календаре дата \"Пятница, 24 января\". Много бытовых деталей, глубокие тени, реалистичная кириллица, никакой размытой каши вместо текста.",
|
| 303 |
+
"image": "assets/comparison/p10_sdnq_uint4_static_fixed.png",
|
| 304 |
+
"time_s": 3.664,
|
| 305 |
+
"peak_allocated_gb": 19.593,
|
| 306 |
+
"peak_reserved_gb": 21.619,
|
| 307 |
+
"end_allocated_gb": 17.181,
|
| 308 |
+
"end_reserved_gb": 21.619
|
| 309 |
},
|
| 310 |
+
"speed_delta_pct": -8.2
|
| 311 |
}
|
| 312 |
]
|
sdnq_quantization_summary.json
CHANGED
|
@@ -1,21 +1,25 @@
|
|
| 1 |
{
|
| 2 |
"source_model": "microsoft/Lens-Turbo",
|
| 3 |
-
"method": "SDNQ
|
| 4 |
-
"scope": "transformer only",
|
| 5 |
-
"
|
| 6 |
-
"vae": "kept in bfloat16",
|
| 7 |
"config": {
|
| 8 |
"weights_dtype": "uint4",
|
| 9 |
"quantized_matmul_dtype": "int8",
|
| 10 |
-
"group_size":
|
| 11 |
-
"use_static_quantization":
|
| 12 |
-
"use_dynamic_quantization":
|
| 13 |
-
"use_quantized_matmul":
|
| 14 |
-
"use_svd":
|
| 15 |
-
"use_hadamard":
|
| 16 |
-
"quant_conv":
|
| 17 |
-
"quant_embedding":
|
| 18 |
-
"dequantize_fp32":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
"quantization_device": "cuda",
|
| 20 |
"return_device": "cuda"
|
| 21 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"source_model": "microsoft/Lens-Turbo",
|
| 3 |
+
"method": "SDNQ uint4 static",
|
| 4 |
+
"scope": "transformer only, excluding modulation linears",
|
| 5 |
+
"ablation_fix": "Transformer block img_mod and txt_mod linears are left in bfloat16 because UINT4 quantization caused periodic grid artifacts and severe text degradation.",
|
|
|
|
| 6 |
"config": {
|
| 7 |
"weights_dtype": "uint4",
|
| 8 |
"quantized_matmul_dtype": "int8",
|
| 9 |
+
"group_size": 0,
|
| 10 |
+
"use_static_quantization": true,
|
| 11 |
+
"use_dynamic_quantization": false,
|
| 12 |
+
"use_quantized_matmul": true,
|
| 13 |
+
"use_svd": false,
|
| 14 |
+
"use_hadamard": false,
|
| 15 |
+
"quant_conv": false,
|
| 16 |
+
"quant_embedding": false,
|
| 17 |
+
"dequantize_fp32": false,
|
| 18 |
+
"modules_to_not_convert": [
|
| 19 |
+
"*.img_mod.*",
|
| 20 |
+
"*.txt_mod.*"
|
| 21 |
+
],
|
| 22 |
+
"modules_to_not_use_matmul": [],
|
| 23 |
"quantization_device": "cuda",
|
| 24 |
"return_device": "cuda"
|
| 25 |
}
|
transformer/config.json
CHANGED
|
@@ -28,31 +28,33 @@
|
|
| 28 |
"modules_dtype_dict": {},
|
| 29 |
"modules_quant_config": {},
|
| 30 |
"modules_to_not_convert": [
|
| 31 |
-
"wte",
|
| 32 |
-
"norm",
|
| 33 |
-
".img_out",
|
| 34 |
-
"multi_modal_projector",
|
| 35 |
-
".norm_out",
|
| 36 |
-
"patch_embedding",
|
| 37 |
-
".img_in",
|
| 38 |
-
".context_embedder",
|
| 39 |
-
".txt_out",
|
| 40 |
-
"patch_embed",
|
| 41 |
-
".vid_in",
|
| 42 |
".final_layer",
|
| 43 |
-
"time_text_embed",
|
| 44 |
"pos_embed",
|
| 45 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
".vid_out",
|
| 47 |
-
".
|
|
|
|
|
|
|
|
|
|
| 48 |
".t_embedder",
|
| 49 |
-
"
|
| 50 |
"patch_emb",
|
| 51 |
-
"
|
| 52 |
-
".
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
".proj_out",
|
| 54 |
-
".
|
| 55 |
-
".
|
|
|
|
|
|
|
|
|
|
| 56 |
".emb_in",
|
| 57 |
"txt_norm.0.weight",
|
| 58 |
"txt_norm.1.weight",
|
|
|
|
| 28 |
"modules_dtype_dict": {},
|
| 29 |
"modules_quant_config": {},
|
| 30 |
"modules_to_not_convert": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
".final_layer",
|
|
|
|
| 32 |
"pos_embed",
|
| 33 |
+
".norm_out",
|
| 34 |
+
".y_embedder",
|
| 35 |
+
".context_embedder",
|
| 36 |
+
".condition_embedder",
|
| 37 |
+
".x_embedder",
|
| 38 |
".vid_out",
|
| 39 |
+
".emb_out",
|
| 40 |
+
".img_in",
|
| 41 |
+
"patch_embed",
|
| 42 |
+
".time_embed",
|
| 43 |
".t_embedder",
|
| 44 |
+
"multi_modal_projector",
|
| 45 |
"patch_emb",
|
| 46 |
+
"norm",
|
| 47 |
+
".img_out",
|
| 48 |
+
"patch_embedding",
|
| 49 |
+
"*.img_mod.*",
|
| 50 |
+
"*.txt_mod.*",
|
| 51 |
+
"lm_head",
|
| 52 |
".proj_out",
|
| 53 |
+
".vid_in",
|
| 54 |
+
".txt_in",
|
| 55 |
+
"wte",
|
| 56 |
+
"time_text_embed",
|
| 57 |
+
".txt_out",
|
| 58 |
".emb_in",
|
| 59 |
"txt_norm.0.weight",
|
| 60 |
"txt_norm.1.weight",
|
transformer/diffusion_pytorch_model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa57170473fdfdd6e6951567129c7c414c891f4053a38ca3f78f27236787935c
|
| 3 |
+
size 3987262976
|
transformer/diffusion_pytorch_model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bdc89e8a85ac091627b8f7db9d0f4009c0da299dab6b53b38bbd0e63b2bb6d0
|
| 3 |
+
size 314450920
|
transformer/diffusion_pytorch_model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
transformer/quantization_config.json
CHANGED
|
@@ -9,31 +9,33 @@
|
|
| 9 |
"modules_dtype_dict": {},
|
| 10 |
"modules_quant_config": {},
|
| 11 |
"modules_to_not_convert": [
|
| 12 |
-
"wte",
|
| 13 |
-
"norm",
|
| 14 |
-
".img_out",
|
| 15 |
-
"multi_modal_projector",
|
| 16 |
-
".norm_out",
|
| 17 |
-
"patch_embedding",
|
| 18 |
-
".img_in",
|
| 19 |
-
".context_embedder",
|
| 20 |
-
".txt_out",
|
| 21 |
-
"patch_embed",
|
| 22 |
-
".vid_in",
|
| 23 |
".final_layer",
|
| 24 |
-
"time_text_embed",
|
| 25 |
"pos_embed",
|
| 26 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
".vid_out",
|
| 28 |
-
".
|
|
|
|
|
|
|
|
|
|
| 29 |
".t_embedder",
|
| 30 |
-
"
|
| 31 |
"patch_emb",
|
| 32 |
-
"
|
| 33 |
-
".
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
".proj_out",
|
| 35 |
-
".
|
| 36 |
-
".
|
|
|
|
|
|
|
|
|
|
| 37 |
".emb_in",
|
| 38 |
"txt_norm.0.weight",
|
| 39 |
"txt_norm.1.weight",
|
|
|
|
| 9 |
"modules_dtype_dict": {},
|
| 10 |
"modules_quant_config": {},
|
| 11 |
"modules_to_not_convert": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
".final_layer",
|
|
|
|
| 13 |
"pos_embed",
|
| 14 |
+
".norm_out",
|
| 15 |
+
".y_embedder",
|
| 16 |
+
".context_embedder",
|
| 17 |
+
".condition_embedder",
|
| 18 |
+
".x_embedder",
|
| 19 |
".vid_out",
|
| 20 |
+
".emb_out",
|
| 21 |
+
".img_in",
|
| 22 |
+
"patch_embed",
|
| 23 |
+
".time_embed",
|
| 24 |
".t_embedder",
|
| 25 |
+
"multi_modal_projector",
|
| 26 |
"patch_emb",
|
| 27 |
+
"norm",
|
| 28 |
+
".img_out",
|
| 29 |
+
"patch_embedding",
|
| 30 |
+
"*.img_mod.*",
|
| 31 |
+
"*.txt_mod.*",
|
| 32 |
+
"lm_head",
|
| 33 |
".proj_out",
|
| 34 |
+
".vid_in",
|
| 35 |
+
".txt_in",
|
| 36 |
+
"wte",
|
| 37 |
+
"time_text_embed",
|
| 38 |
+
".txt_out",
|
| 39 |
".emb_in",
|
| 40 |
"txt_norm.0.weight",
|
| 41 |
"txt_norm.1.weight",
|