Spaces:

pearsonkyle
/

SDXL-Model-Merger

Sleeping

App Files Files Community

Kyle Pearson commited on 29 days ago

Commit

b89e643

1 Parent(s): d723e62

Replace dependency, add quantizer support, fix safetensors export, improve error handling, update UI docs

Browse files

Files changed (3) hide show

requirements.txt +1 -1
src/exporter.py +69 -54
src/ui/exporter_tab.py +3 -2

requirements.txt CHANGED Viewed

@@ -25,7 +25,7 @@ huggingface-hub>=0.23.0
 psutil>=5.9.0
 # Optional: quantization support
-optimum-quanto>=0.2.0
 # ZeroGPU support for HuggingFace Spaces
 spaces

 psutil>=5.9.0
 # Optional: quantization support
+torchao>=0.4.0
 # ZeroGPU support for HuggingFace Spaces
 spaces

src/exporter.py CHANGED Viewed

@@ -8,6 +8,35 @@ from .config import SCRIPT_DIR
 from .gpu_decorator import GPU
 @GPU(duration=180)
 def _extract_and_save(pipe, include_lora, quantize, qtype, save_format):
     """GPU-decorated helper that extracts weights and saves the model."""
@@ -17,64 +46,57 @@ def _extract_and_save(pipe, include_lora, quantize, qtype, save_format):
         except Exception as e:
             print(f"  ℹ️ Could not unload LoRAs: {e}")
     merged_state_dict = {}
     # Extract UNet weights
     for k, v in pipe.unet.state_dict().items():
-        merged_state_dict[f"unet.{k}"] = v.contiguous().half()
     # Extract text encoder weights
     if pipe.text_encoder is not None:
         for k, v in pipe.text_encoder.state_dict().items():
-            merged_state_dict[f"text_encoder.{k}"] = v.contiguous().half()
     if pipe.text_encoder_2 is not None:
         for k, v in pipe.text_encoder_2.state_dict().items():
-            merged_state_dict[f"text_encoder_2.{k}"] = v.contiguous().half()
     # Extract VAE weights
     if pipe.vae is not None:
         for k, v in pipe.vae.state_dict().items():
-            merged_state_dict[f"first_stage_model.{k}"] = v.contiguous().half()
-    # Quantize if requested
-    try:
-        from optimum.quanto import quantize as quanto_quantize, QTensor
-        QUANTO_AVAILABLE = True
-    except ImportError:
-        QUANTO_AVAILABLE = False
-    if quantize and qtype != "none" and QUANTO_AVAILABLE:
-        class FakeModel(torch.nn.Module):
-            pass
-        fake_model = FakeModel()
-        fake_model.__dict__.update(merged_state_dict)
-        if qtype == "int8":
-            from optimum.quanto import int8_weight_only
-            quanto_quantize(fake_model, int8_weight_only())
-        elif qtype == "int4":
-            from optimum.quanto import int4_weight_only
-            quanto_quantize(fake_model, int4_weight_only())
-        elif qtype == "float8":
-            from optimum.quanto import float8_dynamic_activation_float8_weight
-            quanto_quantize(fake_model, float8_dynamic_activation_float8_weight())
-        else:
-            raise ValueError(f"Unsupported qtype: {qtype}")
-        merged_state_dict = {
-            k: v.dequantize().half() if isinstance(v, QTensor) else v
-            for k, v in fake_model.state_dict().items()
-        }
-    elif quantize and not QUANTO_AVAILABLE:
-        raise ImportError("optimum.quanto not installed. Install with: pip install optimum-quanto")
     # Save model
     ext = ".bin" if save_format == "bin" else ".safetensors"
     prefix = f"{qtype}_" if quantize and qtype != "none" else ""
     out_path = SCRIPT_DIR / f"merged_{prefix}checkpoint{ext}"
-    if ext == ".bin":
         torch.save(merged_state_dict, str(out_path))
     else:
         save_file(merged_state_dict, str(out_path))
@@ -97,27 +119,20 @@ def export_merged_model(
         qtype: Quantization type - 'none', 'int8', 'int4', or 'float8'
         save_format: Output format - 'safetensors' or 'bin'
-    Yields:
-        Tuple of (status_message, progress_text) at each export stage.
     Returns:
-        Final yielded tuple of (output_path or None, status message)
     """
     # Fetch the pipeline at call time — avoids the stale import-by-value problem.
     pipe = config.get_pipe()
     if not pipe:
-        yield None, "⚠️ Please load a pipeline first."
-        return
     try:
         # Validate quantization type
         valid_qtypes = ("none", "int8", "int4", "float8")
         if qtype not in valid_qtypes:
-            yield None, f"❌ Invalid quantization type: {qtype}. Must be one of: {valid_qtypes}"
-            return
-        yield "💾 Exporting model...", "Extracting and saving weights..."
         out_path = _extract_and_save(pipe, include_lora, quantize, qtype, save_format)
@@ -128,20 +143,20 @@ def export_merged_model(
         else:
             msg = f"✅ Merged checkpoint saved: `{out_path}` ({size_gb:.2f} GB)"
-        yield str(out_path), msg
     except ImportError as e:
-        yield None, f"❌ Missing dependency: {str(e)}"
     except Exception as e:
         import traceback
         print(traceback.format_exc())
-        yield None, f"❌ Export failed: {str(e)}"
 def get_export_status() -> str:
     """Get current export capability status."""
     try:
-        from optimum.quanto import quantize
-        return "✅ optimum.quanto available for quantization"
     except ImportError:
-        return "ℹ️ Install optimum-quanto for quantization support"

 from .gpu_decorator import GPU
+def _quantize_model(model, qtype: str):
+    """Apply torchao quantization to a model using quantize_."""
+    from torchao.quantization import quantize_
+    if qtype == "int8":
+        from torchao.quantization import Int8WeightOnlyConfig
+        print("  ⚙️ Quantizing with int8_weight_only...")
+        config = Int8WeightOnlyConfig()
+        quantize_(model, config)
+    elif qtype == "int4":
+        from torchao.quantization import Int4WeightOnlyConfig
+        print("  ⚙️ Quantizing with int4_weight_only (group_size=32)...")
+        config = Int4WeightOnlyConfig(group_size=32)
+        quantize_(model, config)
+    elif qtype == "float8":
+        from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
+        print("  ⚙️ Quantizing with float8_dynamic_activation_float8_weight...")
+        config = Float8DynamicActivationFloat8WeightConfig()
+        quantize_(model, config)
+    else:
+        raise ValueError(f"Unsupported qtype: {qtype}. Must be one of: int8, int4, float8")
 @GPU(duration=180)
 def _extract_and_save(pipe, include_lora, quantize, qtype, save_format):
     """GPU-decorated helper that extracts weights and saves the model."""
         except Exception as e:
             print(f"  ℹ️ Could not unload LoRAs: {e}")
+    # Quantize components in-place before extracting state dicts
+    if quantize and qtype != "none":
+        _quantize_model(pipe.unet, qtype)
+        # torchao quantized tensors cannot be saved with safetensors, use torch.save instead
+        # Don't dequantize - keep the quantized format for smaller file size
     merged_state_dict = {}
     # Extract UNet weights
     for k, v in pipe.unet.state_dict().items():
+        # For quantized tensors, save directly; otherwise convert to half
+        if hasattr(v, 'dequantize'):
+            # Keep quantized tensor as-is for smaller file size
+            merged_state_dict[f"unet.{k}"] = v
+        else:
+            merged_state_dict[f"unet.{k}"] = v.contiguous().half()
     # Extract text encoder weights
     if pipe.text_encoder is not None:
         for k, v in pipe.text_encoder.state_dict().items():
+            if hasattr(v, 'dequantize'):
+                merged_state_dict[f"text_encoder.{k}"] = v
+            else:
+                merged_state_dict[f"text_encoder.{k}"] = v.contiguous().half()
     if pipe.text_encoder_2 is not None:
         for k, v in pipe.text_encoder_2.state_dict().items():
+            if hasattr(v, 'dequantize'):
+                merged_state_dict[f"text_encoder_2.{k}"] = v
+            else:
+                merged_state_dict[f"text_encoder_2.{k}"] = v.contiguous().half()
     # Extract VAE weights
     if pipe.vae is not None:
         for k, v in pipe.vae.state_dict().items():
+            if hasattr(v, 'dequantize'):
+                merged_state_dict[f"first_stage_model.{k}"] = v
+            else:
+                merged_state_dict[f"first_stage_model.{k}"] = v.contiguous().half()
     # Save model
     ext = ".bin" if save_format == "bin" else ".safetensors"
     prefix = f"{qtype}_" if quantize and qtype != "none" else ""
     out_path = SCRIPT_DIR / f"merged_{prefix}checkpoint{ext}"
+    if quantize and qtype != "none":
+        # torchao quantized tensors are not compatible with safetensors
+        # Use torch.save instead which preserves the quantization format
+        ext = ".pt"
+        out_path = SCRIPT_DIR / f"merged_{qtype}_checkpoint.pt"
+        torch.save(merged_state_dict, str(out_path))
+    elif ext == ".bin":
         torch.save(merged_state_dict, str(out_path))
     else:
         save_file(merged_state_dict, str(out_path))
         qtype: Quantization type - 'none', 'int8', 'int4', or 'float8'
         save_format: Output format - 'safetensors' or 'bin'
     Returns:
+        Tuple of (output_path or None, status message)
     """
     # Fetch the pipeline at call time — avoids the stale import-by-value problem.
     pipe = config.get_pipe()
     if not pipe:
+        return None, "⚠️ Please load a pipeline first."
     try:
         # Validate quantization type
         valid_qtypes = ("none", "int8", "int4", "float8")
         if qtype not in valid_qtypes:
+            return None, f"❌ Invalid quantization type: {qtype}. Must be one of: {valid_qtypes}"
         out_path = _extract_and_save(pipe, include_lora, quantize, qtype, save_format)
         else:
             msg = f"✅ Merged checkpoint saved: `{out_path}` ({size_gb:.2f} GB)"
+        return str(out_path), msg
     except ImportError as e:
+        return None, f"❌ Missing dependency: {str(e)}"
     except Exception as e:
         import traceback
         print(traceback.format_exc())
+        return None, f"❌ Export failed: {str(e)}"
 def get_export_status() -> str:
     """Get current export capability status."""
     try:
+        from torchao.quantization import quantize_, Int4WeightOnlyConfig, Int8WeightOnlyConfig, Float8DynamicActivationFloat8WeightConfig
+        return "✅ torchao available for quantization"
     except ImportError:
+        return "ℹ️ Install torchao for quantization support: pip install torchao"

src/ui/exporter_tab.py CHANGED Viewed

@@ -62,8 +62,9 @@ def create_exporter_tab():
                     <div style="margin-top: 16px; padding: 12px; background: #e0f2fe; border-radius: 8px;">
                         <strong>ℹ️ About Quantization:</strong>
                         <p style="font-size: 0.9em; margin: 8px 0;">
-                            Reduces model size by lowering precision. Int8 is typically
-                            lossless for inference while cutting size in half.
                         </p>
                     </div>
                 """)

                     <div style="margin-top: 16px; padding: 12px; background: #e0f2fe; border-radius: 8px;">
                         <strong>ℹ️ About Quantization:</strong>
                         <p style="font-size: 0.9em; margin: 8px 0;">
+                            Reduces model size by lowering precision using torchao.
+                            Int8 is typically lossless for inference while cutting size in half.
+                            Int4 provides maximum compression with minimal quality loss.
                         </p>
                     </div>
                 """)