asdf98
/

LuminaRS

Model card Files Files and versions

xet

Community

asdf98 commited on 16 days ago

Commit

7e03082

verified ·

1 Parent(s): 3afddfb

Upload train_stage1.py

Browse files

Files changed (1) hide show

train_stage1.py +35 -0

train_stage1.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""
+LuminaRS Stage 1: Core Flow-Matching Training
+Trains the denoiser on art/illustration data with flow matching.
+Colab A100 compatible. Uses frozen pretrained VAE + CLIP.
+"""
+import os, math, torch, torch.nn.functional as F
+from torch.utils.data import DataLoader
+from datasets import load_dataset
+from torchvision import transforms
+from transformers import CLIPTextModel, CLIPTokenizer
+from diffusers import AutoencoderKL
+from luminars.model import LuminaRS
+from luminars.config import LuminaRSConfig
+# ── Flow Matching Loss ──────────────────────────────────────────────────
+def flow_matching_loss(model, vae, clip, z0, text_emb):
+    """Optimal-transport flow matching: v(x_t, t) = x1 - x0"""
+    B = z0.shape[0]
+    t = torch.rand(B, device=z0.device)
+    x1 = z0  # clean latent
+    x0 = torch.randn_like(z1)  # noise
+    # Linear interpolation
+    xt = (1 - t[:,None,None,None]) * x0 + t[:,None,None,None] * x1
+    # Target velocity (straight line)
+    v_target = x1 - x0
+    v_pred = model(xt, text_emb, t)
+    return F.mse_loss(v_pred, v_target)
+def flow_matching_loss(model, vae, clip, pixel_images, text_tokens):
+    """Full pipeline: image -> VAE encode -> flow matching."""
+    with torch.no_grad():
+        latents = vae.encode(pixel_images).latent_dist.sample()
+        latents = latents * vae.config.scaling_factor
+        text_emb = clip(text_tokens).last_hidden_state
+    return flow_matching_loss(model, vae, clip, latents, text_emb), latents, text_emb