Text-to-Image
Diffusers
Safetensors
LensPipeline
lens
sdnq
uint4
static-quantization
ablation
model-cpu-offload
Instructions to use WaveCut/Lens-SDNQ-uint4-static with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use WaveCut/Lens-SDNQ-uint4-static with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("WaveCut/Lens-SDNQ-uint4-static", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Draw Things
- DiffusionBee
| { | |
| "source_model": "microsoft/Lens", | |
| "target_model": "WaveCut/Lens-SDNQ-uint4-static", | |
| "method": "SDNQ uint4 static", | |
| "corrected_recipe": true, | |
| "weights_dtype": "uint4", | |
| "quantized_matmul_dtype": "int8", | |
| "group_size": 0, | |
| "use_quantized_matmul": true, | |
| "dequantize_fp32": false, | |
| "modules_to_not_convert_user": [ | |
| ".final_layer", | |
| "pos_embed", | |
| ".norm_out", | |
| ".y_embedder", | |
| ".context_embedder", | |
| ".condition_embedder", | |
| ".x_embedder", | |
| ".vid_out", | |
| ".emb_out", | |
| ".img_in", | |
| "patch_embed", | |
| ".time_embed", | |
| ".t_embedder", | |
| "multi_modal_projector", | |
| "patch_emb", | |
| "norm", | |
| ".img_out", | |
| "patch_embedding", | |
| "lm_head", | |
| ".proj_out", | |
| ".vid_in", | |
| ".txt_in", | |
| "wte", | |
| "time_text_embed", | |
| ".txt_out", | |
| ".emb_in", | |
| "*.img_mod.*", | |
| "*.txt_mod.*" | |
| ], | |
| "root_cause_from_turbo_ablation": "Do not quantize transformer modulation linears (*.img_mod.* and *.txt_mod.*); all-linear UINT4 caused periodic grid artifacts and text degradation on Lens-Turbo.", | |
| "transformer_load_time_s": 3.677, | |
| "transformer_load_peak_allocated_gb": 8.359, | |
| "transformer_load_peak_reserved_gb": 8.424, | |
| "quantization_time_s": 0.313, | |
| "quantization_peak_allocated_gb": 8.425, | |
| "quantization_peak_reserved_gb": 8.485, | |
| "base_transformer_tensor_storage_gb": 16.417, | |
| "quant_transformer_tensor_storage_gb": 4.301, | |
| "transformer_storage_reduction_percent": 73.8, | |
| "base_transformer_repo_files_gb": 16.417, | |
| "quant_transformer_repo_files_gb": 4.302, | |
| "base_transformer_dtypes": { | |
| "FLOAT32": 16416900608 | |
| }, | |
| "quant_transformer_dtypes": { | |
| "BFLOAT16": 2942501632, | |
| "UINT8": 1358954496 | |
| }, | |
| "base_transformer_tensors": 1264, | |
| "quant_transformer_tensors": 2224 | |
| } |