asdf98
/

LiquidGen

Model card Files Files and versions

xet

Community

asdf98 commited on 9 days ago

Commit

ba78ae2

verified ·

1 Parent(s): a733be1

Fix: SDXL VAE (no login), streaming dataset, step-based training

Browse files

Files changed (1) hide show

train.py +11 -7

train.py CHANGED Viewed

@@ -44,12 +44,12 @@ class TrainConfig:
     max_samples: int = 0           # 0 = use all (only for non-streaming)
     streaming_buffer: int = 1000   # Shuffle buffer for streaming
-    # VAE
-    vae_id: str = "black-forest-labs/FLUX.1-schnell"
-    vae_subfolder: str = "vae"
     vae_dtype: str = "float16"
-    vae_scaling_factor: float = 0.3611
-    vae_shift_factor: float = 0.1159
     # Training
     batch_size: int = 8
@@ -323,8 +323,11 @@ def train(config: TrainConfig):
     print("Loading VAE...")
     from diffusers import AutoencoderKL
     vae_dtype = torch.float16 if config.vae_dtype == "float16" else torch.bfloat16
     vae = AutoencoderKL.from_pretrained(
-        config.vae_id, subfolder=config.vae_subfolder, torch_dtype=vae_dtype
     ).to(device).eval()
     for p in vae.parameters():
         p.requires_grad_(False)
@@ -448,7 +451,8 @@ def train(config: TrainConfig):
                     sample_labels = None
                     if config.num_classes > 0:
                         sample_labels = torch.randint(0, config.num_classes, (config.num_samples,), device=device)
-                    sampled = fm.sample(model, (config.num_samples, 16, latent_size, latent_size),
                                        device, config.num_sample_steps, sample_labels, config.cfg_scale)
                     sample_imgs = decode_latents_with_vae(sampled.to(vae_dtype), vae,
                                                           config.vae_scaling_factor, config.vae_shift_factor).float()

     max_samples: int = 0           # 0 = use all (only for non-streaming)
     streaming_buffer: int = 1000   # Shuffle buffer for streaming
+    # VAE (SDXL VAE - open access, no login needed, fp16-safe)
+    vae_id: str = "madebyollin/sdxl-vae-fp16-fix"
+    vae_subfolder: str = ""
     vae_dtype: str = "float16"
+    vae_scaling_factor: float = 0.13025
+    vae_shift_factor: float = 0.0       # SDXL VAE has no shift
     # Training
     batch_size: int = 8
     print("Loading VAE...")
     from diffusers import AutoencoderKL
     vae_dtype = torch.float16 if config.vae_dtype == "float16" else torch.bfloat16
+    vae_kwargs = {"torch_dtype": vae_dtype}
+    if config.vae_subfolder:
+        vae_kwargs["subfolder"] = config.vae_subfolder
     vae = AutoencoderKL.from_pretrained(
+        config.vae_id, **vae_kwargs
     ).to(device).eval()
     for p in vae.parameters():
         p.requires_grad_(False)
                     sample_labels = None
                     if config.num_classes > 0:
                         sample_labels = torch.randint(0, config.num_classes, (config.num_samples,), device=device)
+                    latent_ch = vae.config.latent_channels  # 4 for SDXL, 16 for Flux
+                    sampled = fm.sample(model, (config.num_samples, latent_ch, latent_size, latent_size),
                                        device, config.num_sample_steps, sample_labels, config.cfg_scale)
                     sample_imgs = decode_latents_with_vae(sampled.to(vae_dtype), vae,
                                                           config.vae_scaling_factor, config.vae_shift_factor).float()