asdf98
/

LiquidGen

asdf98 commited on 8 days ago

Commit

a733be1

verified ·

1 Parent(s): 1373ccf

Fix: use SDXL VAE (4ch, no login needed)

Files changed (1) hide show

model.py CHANGED Viewed

@@ -2,7 +2,7 @@
 LiquidGen: A Novel Liquid Neural Network Image Generation Model
 Architecture Overview:
-- Frozen VAE encoder/decoder (FLUX.1-schnell, 16ch latent, 8x compression)
 - Liquid backbone for denoising (fully parallelizable, no attention, no sequential ODE)
 - Flow matching training objective (velocity prediction)
@@ -305,7 +305,7 @@ class LiquidGen(nn.Module):
     def __init__(
         self,
-        in_channels: int = 16,
         patch_size: int = 2,
         embed_dim: int = 512,
         depth: int = 16,
@@ -385,7 +385,7 @@ class LiquidGen(nn.Module):
         """
         Predict velocity field for flow matching.
         Args:
-            x: [B, C, H, W] noisy latent (C=16 for Flux VAE)
             t: [B] timestep in [0, 1]
             class_labels: [B] optional class labels
         Returns:
@@ -459,13 +459,15 @@ if __name__ == "__main__":
         model = factory(num_classes=27).to(device)
         print(f"LiquidGen-{name}: {model.count_params() / 1e6:.1f}M params")
-        x = torch.randn(2, 16, 32, 32, device=device)
         t = torch.rand(2, device=device)
         labels = torch.randint(0, 27, (2,), device=device)
         v = model(x, t, labels)
         assert v.shape == x.shape
-        x512 = torch.randn(1, 16, 64, 64, device=device)
         v512 = model(x512, t[:1], labels[:1])
         assert v512.shape == x512.shape
         print(f"  256px ✅ 512px ✅")

 LiquidGen: A Novel Liquid Neural Network Image Generation Model
 Architecture Overview:
+- Frozen VAE encoder/decoder (SDXL VAE, 4ch latent, 8x compression, no login needed)
 - Liquid backbone for denoising (fully parallelizable, no attention, no sequential ODE)
 - Flow matching training objective (velocity prediction)
     def __init__(
         self,
+        in_channels: int = 4,         # 4 for SDXL VAE
         patch_size: int = 2,
         embed_dim: int = 512,
         depth: int = 16,
         """
         Predict velocity field for flow matching.
         Args:
+            x: [B, C, H, W] noisy latent (C=4 for SDXL VAE)
             t: [B] timestep in [0, 1]
             class_labels: [B] optional class labels
         Returns:
         model = factory(num_classes=27).to(device)
         print(f"LiquidGen-{name}: {model.count_params() / 1e6:.1f}M params")
+        # 256px: image/8 = 32x32 latent, 4 channels (SDXL VAE)
+        x = torch.randn(2, 4, 32, 32, device=device)
         t = torch.rand(2, device=device)
         labels = torch.randint(0, 27, (2,), device=device)
         v = model(x, t, labels)
         assert v.shape == x.shape
+        # 512px: image/8 = 64x64 latent
+        x512 = torch.randn(1, 4, 64, 64, device=device)
         v512 = model(x512, t[:1], labels[:1])
         assert v512.shape == x512.shape
         print(f"  256px ✅ 512px ✅")