asdf98
/

LiquidGen

Model card Files Files and versions

xet

Community

asdf98 commited on 8 days ago

Commit

2403335

verified ·

1 Parent(s): 1aa5ac1

Fix: add trust_remote_code=True for datasets with legacy loading scripts

Browse files

Files changed (1) hide show

train.py +16 -16

train.py CHANGED Viewed

@@ -38,6 +38,7 @@ DATASET_PRESETS = {
         "image_column": "image",
         "label_column": "labels",
         "num_classes": 27,
         "description": "~200 painting samples, 27 styles, 1.7MB — instant smoke test",
     },
     "paintings": {
@@ -46,6 +47,7 @@ DATASET_PRESETS = {
         "image_column": "image",
         "label_column": "labels",
         "num_classes": 27,
         "description": "~8K paintings, 27 styles, 204MB — best for style-conditional training",
     },
     "cartoon": {
@@ -90,18 +92,16 @@ class TrainConfig:
     max_images: int = 0                # 0 = use all, >0 = limit (for streaming/testing)
     # VAE — fully open, no login needed
-    # madebyollin/sdxl-vae-fp16-fix: SDXL VAE with fp16 NaN fix
-    # 4 latent channels, 8x spatial compression, scaling_factor=0.13025
     vae_id: str = "madebyollin/sdxl-vae-fp16-fix"
     vae_scaling_factor: float = 0.13025
     latent_channels: int = 4
     # Training
-    batch_size: int = 32               # Can be large since training on cached tensors!
     gradient_accumulation_steps: int = 1
     learning_rate: float = 1e-4
     weight_decay: float = 0.01
-    max_grad_norm: float = 2.0         # Critical for stability (ZigMa paper)
     num_epochs: int = 100
     warmup_steps: int = 500
     ema_decay: float = 0.9999
@@ -190,7 +190,7 @@ def precache_latents(config, cache_path=None):
     os.makedirs(os.path.dirname(cache_path) if os.path.dirname(cache_path) else ".", exist_ok=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # Load VAE — no subfolder, no auth needed
     print(f"Loading VAE: {config.vae_id} (open, no login needed)...")
     from diffusers import AutoencoderKL
     vae = AutoencoderKL.from_pretrained(
@@ -213,6 +213,9 @@ def precache_latents(config, cache_path=None):
         ds_kwargs["name"] = preset["config"]
     if is_streaming:
         ds_kwargs["streaming"] = True
     dataset = load_dataset(preset["name"], **ds_kwargs)
@@ -252,7 +255,7 @@ def precache_latents(config, cache_path=None):
             with torch.no_grad():
                 px = torch.stack(batch_pixels).to(device, dtype=torch.float16) * 2 - 1
                 lat = vae.encode(px).latent_dist.sample()
-                lat = lat * config.vae_scaling_factor  # SDXL: scale only, no shift
                 all_latents.append(lat.cpu().float())
             all_labels.extend(batch_labels)
             batch_pixels, batch_labels = [], []
@@ -273,7 +276,7 @@ def precache_latents(config, cache_path=None):
     elapsed = time.time() - t0
     mb = os.path.getsize(cache_path) / 1024**2
-    print(f"\n✅ Cached {count} latents → {cache_path}")
     print(f"   Shape: {all_latents.shape}, Size: {mb:.1f}MB, Time: {elapsed:.0f}s")
     del vae
@@ -376,9 +379,9 @@ def train(config):
     train_dl = DataLoader(train_ds, batch_size=config.batch_size, shuffle=True,
                           num_workers=config.num_workers, pin_memory=True, drop_last=True)
-    # Step 3: Model (in_channels=4 for SDXL VAE)
     mcfg = get_model_config(config.model_size, config.num_classes, config.class_drop_prob)
-    mcfg["in_channels"] = config.latent_channels  # 4 for SDXL VAE
     model = LiquidGen(**mcfg).to(device)
     print(f"LiquidGen-{config.model_size}: {model.count_params()/1e6:.1f}M params")
@@ -440,10 +443,9 @@ def train(config):
                           f"lr={lr:.2e} | vram={vram:.1f}G | {sps:.1f} st/s")
                     la = 0.0
                     if math.isnan(al) or al > 50:
-                        print("💥 Diverged!"); return
                 if gs % config.sample_every_n_steps == 0:
-                    # Load VAE lazily (only for decoding samples)
                     if not vae_loaded:
                         from diffusers import AutoencoderKL
                         vae = AutoencoderKL.from_pretrained(
@@ -454,16 +456,14 @@ def train(config):
                     ema.apply(model); model.eval()
                     sl = torch.randint(0, max(1, config.num_classes), (config.num_samples,),
                                        device=device) if config.num_classes > 0 else None
-                    # 4 channels for SDXL VAE
                     samp = fm.sample(model, (config.num_samples, config.latent_channels, lat_size, lat_size),
                                      device, config.num_sample_steps, sl, config.cfg_scale)
                     with torch.no_grad():
-                        # SDXL VAE: unscale only, no shift
                         dec = samp.half() / config.vae_scaling_factor
                         imgs = ((vae.decode(dec).sample + 1) / 2).clamp(0, 1).float()
                     from torchvision.utils import save_image
                     sp = f"{config.output_dir}/samples/step_{gs:07d}.png"
-                    save_image(imgs, sp, nrow=2); print(f"  📸 {sp}")
                     ema.restore(model); model.train()
                 if gs % config.save_every_n_steps == 0:
@@ -471,14 +471,14 @@ def train(config):
                     torch.save({"model": model.state_dict(), "ema": ema.shadow,
                                 "optimizer": opt.state_dict(), "scheduler": sched.state_dict(),
                                 "step": gs, "epoch": epoch, "model_config": mcfg}, cp)
-                    print(f"  💾 {cp}")
         print(f"Epoch {epoch} | {time.time()-et:.0f}s\n")
     final = f"{config.output_dir}/checkpoints/final.pt"
     torch.save({"model": model.state_dict(), "ema": ema.shadow,
                 "model_config": mcfg, "step": gs}, final)
-    print(f"\n🎉 Done! {gs} steps, {(time.time()-t_start)/60:.1f}min -> {final}")
 if __name__ == "__main__":

         "image_column": "image",
         "label_column": "labels",
         "num_classes": 27,
+        "trust_remote_code": True,
         "description": "~200 painting samples, 27 styles, 1.7MB — instant smoke test",
     },
     "paintings": {
         "image_column": "image",
         "label_column": "labels",
         "num_classes": 27,
+        "trust_remote_code": True,
         "description": "~8K paintings, 27 styles, 204MB — best for style-conditional training",
     },
     "cartoon": {
     max_images: int = 0                # 0 = use all, >0 = limit (for streaming/testing)
     # VAE — fully open, no login needed
     vae_id: str = "madebyollin/sdxl-vae-fp16-fix"
     vae_scaling_factor: float = 0.13025
     latent_channels: int = 4
     # Training
+    batch_size: int = 32
     gradient_accumulation_steps: int = 1
     learning_rate: float = 1e-4
     weight_decay: float = 0.01
+    max_grad_norm: float = 2.0
     num_epochs: int = 100
     warmup_steps: int = 500
     ema_decay: float = 0.9999
     os.makedirs(os.path.dirname(cache_path) if os.path.dirname(cache_path) else ".", exist_ok=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Load VAE
     print(f"Loading VAE: {config.vae_id} (open, no login needed)...")
     from diffusers import AutoencoderKL
     vae = AutoencoderKL.from_pretrained(
         ds_kwargs["name"] = preset["config"]
     if is_streaming:
         ds_kwargs["streaming"] = True
+    # Some datasets have legacy loading scripts that need this flag
+    if preset.get("trust_remote_code", False):
+        ds_kwargs["trust_remote_code"] = True
     dataset = load_dataset(preset["name"], **ds_kwargs)
             with torch.no_grad():
                 px = torch.stack(batch_pixels).to(device, dtype=torch.float16) * 2 - 1
                 lat = vae.encode(px).latent_dist.sample()
+                lat = lat * config.vae_scaling_factor
                 all_latents.append(lat.cpu().float())
             all_labels.extend(batch_labels)
             batch_pixels, batch_labels = [], []
     elapsed = time.time() - t0
     mb = os.path.getsize(cache_path) / 1024**2
+    print(f"\n✅ Cached {count} latents -> {cache_path}")
     print(f"   Shape: {all_latents.shape}, Size: {mb:.1f}MB, Time: {elapsed:.0f}s")
     del vae
     train_dl = DataLoader(train_ds, batch_size=config.batch_size, shuffle=True,
                           num_workers=config.num_workers, pin_memory=True, drop_last=True)
+    # Step 3: Model
     mcfg = get_model_config(config.model_size, config.num_classes, config.class_drop_prob)
+    mcfg["in_channels"] = config.latent_channels
     model = LiquidGen(**mcfg).to(device)
     print(f"LiquidGen-{config.model_size}: {model.count_params()/1e6:.1f}M params")
                           f"lr={lr:.2e} | vram={vram:.1f}G | {sps:.1f} st/s")
                     la = 0.0
                     if math.isnan(al) or al > 50:
+                        print("Diverged!"); return
                 if gs % config.sample_every_n_steps == 0:
                     if not vae_loaded:
                         from diffusers import AutoencoderKL
                         vae = AutoencoderKL.from_pretrained(
                     ema.apply(model); model.eval()
                     sl = torch.randint(0, max(1, config.num_classes), (config.num_samples,),
                                        device=device) if config.num_classes > 0 else None
                     samp = fm.sample(model, (config.num_samples, config.latent_channels, lat_size, lat_size),
                                      device, config.num_sample_steps, sl, config.cfg_scale)
                     with torch.no_grad():
                         dec = samp.half() / config.vae_scaling_factor
                         imgs = ((vae.decode(dec).sample + 1) / 2).clamp(0, 1).float()
                     from torchvision.utils import save_image
                     sp = f"{config.output_dir}/samples/step_{gs:07d}.png"
+                    save_image(imgs, sp, nrow=2); print(f"  Saved: {sp}")
                     ema.restore(model); model.train()
                 if gs % config.save_every_n_steps == 0:
                     torch.save({"model": model.state_dict(), "ema": ema.shadow,
                                 "optimizer": opt.state_dict(), "scheduler": sched.state_dict(),
                                 "step": gs, "epoch": epoch, "model_config": mcfg}, cp)
+                    print(f"  Saved: {cp}")
         print(f"Epoch {epoch} | {time.time()-et:.0f}s\n")
     final = f"{config.output_dir}/checkpoints/final.pt"
     torch.save({"model": model.state_dict(), "ema": ema.shadow,
                 "model_config": mcfg, "step": gs}, final)
+    print(f"\nDone! {gs} steps, {(time.time()-t_start)/60:.1f}min -> {final}")
 if __name__ == "__main__":