Fix geomloss tensor shape bug for images + optimize pool sampling

- compute_velocity now flattens (N,C,H,W) → (N,D) before geomloss calls,
reshapes gradients back to original shape after. Fixes MNIST/CIFAR crash.
- compute_sinkhorn_divergence also handles image tensors.
- TrajectoryPool.finalize() pre-concatenates after pool building (O(1) sampling
instead of O(pool_size) per step)."

Files changed (1) hide show

sinkhorn_flow.py +83 -12

sinkhorn_flow.py CHANGED Viewed

@@ -22,6 +22,10 @@ class SinkhornPotentialComputer:
     The velocity field of the Sinkhorn WGF is (Theorem 1):
         v(x) = ∇f_{μ,μ}(x) - ∇f_{μ,μ*}(x)
     Args:
         blur: GeomLoss blur parameter (related to ε: ε = blur^p).
         scaling: Multiscale scaling parameter for Sinkhorn iterations.
@@ -45,31 +49,57 @@ class SinkhornPotentialComputer:
             backend=backend, potentials=False,
         )
     def compute_velocity(self, X: torch.Tensor, Y: torch.Tensor) -> torch.Tensor:
         """Compute the Sinkhorn WGF velocity field at particles X.
         v(X_i) = ∇f_{μ,μ}(X_i) - ∇f_{μ,μ*}(X_i)
         """
-        X_grad = X.detach().clone().requires_grad_(True)
-        Y_det = Y.detach()
-        F_self, _ = self.loss_fn(X_grad, X_grad.detach().clone())
         grad_self = torch.autograd.grad(
             F_self.sum(), X_grad, create_graph=False, retain_graph=False
         )[0]
-        X_grad2 = X.detach().clone().requires_grad_(True)
-        F_cross, _ = self.loss_fn(X_grad2, Y_det)
         grad_cross = torch.autograd.grad(
             F_cross.sum(), X_grad2, create_graph=False, retain_graph=False
         )[0]
         velocity = grad_self.detach() - grad_cross.detach()
         return velocity
     def compute_sinkhorn_divergence(self, X: torch.Tensor, Y: torch.Tensor) -> float:
         with torch.no_grad():
-            return self.loss_monitor(X, Y).item()
 class SinkhornGradientFlow:
@@ -109,7 +139,11 @@ class SinkhornGradientFlow:
 class TrajectoryPool:
-    """Stores (x, v, t) tuples from Sinkhorn gradient flow trajectories."""
     def __init__(self, max_size: int = 1_000_000):
         self.max_size = max_size
@@ -117,8 +151,15 @@ class TrajectoryPool:
         self.v_pool: List[torch.Tensor] = []
         self.t_pool: List[int] = []
         self._size = 0
     def add_trajectory(self, trajectory: List[Tuple[torch.Tensor, torch.Tensor, int]]):
         for x, v, t in trajectory:
             n = x.shape[0]
             if self._size + n > self.max_size:
@@ -147,13 +188,43 @@ class TrajectoryPool:
                 self._size -= (batch_size - keep)
                 removed = n
     def sample(self, batch_size: int, device: str = "cpu"
                ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        all_x = torch.cat(self.x_pool, dim=0)
-        all_v = torch.cat(self.v_pool, dim=0)
-        all_t = torch.tensor(self.t_pool, dtype=torch.float32)
-        idx = torch.randint(0, all_x.shape[0], (batch_size,))
-        return all_x[idx].to(device), all_v[idx].to(device), all_t[idx].to(device)
     @property
     def size(self) -> int:

     The velocity field of the Sinkhorn WGF is (Theorem 1):
         v(x) = ∇f_{μ,μ}(x) - ∇f_{μ,μ*}(x)
+    IMPORTANT: GeomLoss SamplesLoss requires inputs as (N, D) or (B, N, D) tensors.
+    For image data (N, C, H, W), we flatten to (N, C*H*W) before calling geomloss,
+    then reshape gradients back to (N, C, H, W).
     Args:
         blur: GeomLoss blur parameter (related to ε: ε = blur^p).
         scaling: Multiscale scaling parameter for Sinkhorn iterations.
             backend=backend, potentials=False,
         )
+    def _flatten_if_image(self, X: torch.Tensor) -> Tuple[torch.Tensor, bool, torch.Size]:
+        """Flatten (N,C,H,W) → (N,D) for geomloss. Returns (flat_tensor, was_image, original_shape)."""
+        original_shape = X.shape
+        if X.dim() == 4:
+            return X.view(X.shape[0], -1), True, original_shape
+        return X, False, original_shape
     def compute_velocity(self, X: torch.Tensor, Y: torch.Tensor) -> torch.Tensor:
         """Compute the Sinkhorn WGF velocity field at particles X.
         v(X_i) = ∇f_{μ,μ}(X_i) - ∇f_{μ,μ*}(X_i)
+        Handles both 2D point clouds (N,D) and images (N,C,H,W) by
+        flattening images before geomloss calls.
         """
+        original_shape = X.shape
+        # Flatten if image tensors
+        X_flat, is_image, _ = self._flatten_if_image(X.detach().clone())
+        Y_flat, _, _ = self._flatten_if_image(Y.detach())
+        # --- Self-potential: ∇f_{μ,μ}(X) ---
+        X_grad = X_flat.requires_grad_(True)
+        X_self_detached = X_flat.detach().clone()
+        F_self, _ = self.loss_fn(X_grad, X_self_detached)
         grad_self = torch.autograd.grad(
             F_self.sum(), X_grad, create_graph=False, retain_graph=False
         )[0]
+        # --- Cross-potential: ∇f_{μ,μ*}(X) ---
+        X_grad2 = X_flat.detach().clone().requires_grad_(True)
+        F_cross, _ = self.loss_fn(X_grad2, Y_flat)
         grad_cross = torch.autograd.grad(
             F_cross.sum(), X_grad2, create_graph=False, retain_graph=False
         )[0]
+        # Velocity = ∇f_{μ,μ} - ∇f_{μ,μ*}
         velocity = grad_self.detach() - grad_cross.detach()
+        # Reshape back to original shape if image
+        if is_image:
+            velocity = velocity.view(original_shape)
         return velocity
     def compute_sinkhorn_divergence(self, X: torch.Tensor, Y: torch.Tensor) -> float:
+        """Compute Sinkhorn divergence S_ε(μ, μ*). Handles image tensors."""
         with torch.no_grad():
+            X_flat, _, _ = self._flatten_if_image(X)
+            Y_flat, _, _ = self._flatten_if_image(Y)
+            return self.loss_monitor(X_flat, Y_flat).item()
 class SinkhornGradientFlow:
 class TrajectoryPool:
+    """Stores (x, v, t) tuples from Sinkhorn gradient flow trajectories.
+    After building, call finalize() to pre-concatenate tensors for O(1) sampling.
+    Without finalize(), sampling is O(pool_size) per call due to torch.cat.
+    """
     def __init__(self, max_size: int = 1_000_000):
         self.max_size = max_size
         self.v_pool: List[torch.Tensor] = []
         self.t_pool: List[int] = []
         self._size = 0
+        self._finalized = False
+        self._all_x = None
+        self._all_v = None
+        self._all_t = None
     def add_trajectory(self, trajectory: List[Tuple[torch.Tensor, torch.Tensor, int]]):
+        """Add (x, v, t) entries from a flow trajectory. Call before finalize()."""
+        if self._finalized:
+            raise RuntimeError("Cannot add to a finalized pool. Create a new pool.")
         for x, v, t in trajectory:
             n = x.shape[0]
             if self._size + n > self.max_size:
                 self._size -= (batch_size - keep)
                 removed = n
+    def finalize(self):
+        """Pre-concatenate all pool data for fast O(1) sampling.
+        Call this once after all trajectories have been added.
+        After finalization, sample() is fast (just random indexing).
+        """
+        if self._size == 0:
+            raise RuntimeError("Cannot finalize an empty pool.")
+        self._all_x = torch.cat(self.x_pool, dim=0)
+        self._all_v = torch.cat(self.v_pool, dim=0)
+        self._all_t = torch.tensor(self.t_pool, dtype=torch.float32)
+        # Free the lists to save memory
+        self.x_pool = None
+        self.v_pool = None
+        self.t_pool = None
+        self._finalized = True
     def sample(self, batch_size: int, device: str = "cpu"
                ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Sample a random batch from the pool.
+        If finalize() was called, this is O(1). Otherwise falls back to O(pool_size).
+        """
+        if self._finalized:
+            idx = torch.randint(0, self._all_x.shape[0], (batch_size,))
+            return (
+                self._all_x[idx].to(device),
+                self._all_v[idx].to(device),
+                self._all_t[idx].to(device),
+            )
+        else:
+            # Fallback: concatenate on the fly (slow for large pools)
+            all_x = torch.cat(self.x_pool, dim=0)
+            all_v = torch.cat(self.v_pool, dim=0)
+            all_t = torch.tensor(self.t_pool, dtype=torch.float32)
+            idx = torch.randint(0, all_x.shape[0], (batch_size,))
+            return all_x[idx].to(device), all_v[idx].to(device), all_t[idx].to(device)
     @property
     def size(self) -> int: