asdf98
/

LuminaRS

Model card Files Files and versions

xet

Community

asdf98 commited on 27 days ago

Commit

3afddfb

verified ·

1 Parent(s): 359afd9

Delete luminars/ssm.py

Browse files

Files changed (1) hide show

luminars/ssm.py +0 -113

luminars/ssm.py DELETED Viewed

@@ -1,113 +0,0 @@
-"""
-Spatial Recurrent Block (SRB) -- inspired by RWKV + VMamba-UNet.
-Uses depthwise conv for spatial token-shift and channel-wise decay mixing.
-Pure PyTorch, no heavy deps.
-"""
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-def rmsnorm(x, eps=1e-6):
-    return x * torch.rsqrt(x.mean(dim=-1, keepdim=True) ** 2 + eps)
-class RMSNorm(nn.Module):
-    def __init__(self, dim, eps=1e-6):
-        super().__init__()
-        self.eps = eps
-        self.gamma = nn.Parameter(torch.ones(dim))
-    def forward(self, x):
-        # x: (..., dim)
-        norm = x.norm(2, dim=-1, keepdim=True) / math.sqrt(x.shape[-1])
-        return self.gamma * x / (norm + self.eps)
-class SpatialRecurrentBlock(nn.Module):
-    """
-    A block that:
-    1. Token-shifts spatially with a 3x3 depthwise conv (spatial mixing)
-    2. Applies channel-wise decay-mixing (RWKV time-mix equivalent)
-    3. Returns residual output
-    Channels always treated as sequence dim for the SSM part.
-    Spatial dims are folded into batch.
-    """
-    def __init__(self, dim, d_state=64, drop_path=0.0):
-        super().__init__()
-        self.dim = dim
-        self.d_state = d_state
-        # Spatial token shift (depthwise 3x3 conv)
-        self.spatial_conv = nn.Conv2d(dim, dim, kernel_size=3, padding=1, groups=dim)
-        self.spatial_norm = RMSNorm(dim)
-        # Input-dependent selective projections
-        self.x_proj_in = nn.Linear(dim, d_state * 2 + 1, bias=False)  # [B, C, decay]
-        self.x_proj_A  = nn.Parameter(torch.arange(d_state).float() * -math.log(10000) / d_state)  # S4D init
-        # State-to-output
-        self.state_out = nn.Linear(d_state, dim, bias=False)
-        self.D = nn.Parameter(torch.ones(dim) * 1.0)  # skip
-        # Post-MLP
-        self.mlp = nn.Sequential(
-            RMSNorm(dim),
-            nn.Linear(dim, dim * 2),
-            nn.GELU(),
-            nn.Linear(dim * 2, dim),
-        )
-        # Drop path (stochastic depth)
-        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
-    def forward(self, x):
-        """
-        x: (B, C, H, W)
-        Returns: (B, C, H, W)
-        """
-        B, C, H, W = x.shape
-        shortcut = x
-        # --- SPATIAL TOKEN SHIFT ---
-        x_shift = self.spatial_conv(x)  # (B, C, H, W)
-        # Flatten to sequence for SSM: (B*H*W, C)
-        x_flat = rearrange_for_ssm(x_shift)  # (BHW, C)
-        # --- SELECTIVE STATE SPACE (MAMBA-style) ---
-        # Per-token selectivity
-        params = self.x_proj_in(x_flat)  # (BHW, d_state*2 + 1)
-        B_param, C_param, delta_log = params.split([self.d_state, self.d_state, 1], dim=-1)
-        delta = F.softplus(delta_log.squeeze(-1))  # (BHW,)
-        # Discretize A
-        A = -torch.exp(self.x_proj_A)  # negative for stability
-        A_bar = torch.exp(delta.unsqueeze(-1) * A)  # (BHW, d_state)
-        # Input-to-state
-        Bx = B_param * x_flat  # (BHW, d_state)
-        # RECURRENT SCAN (vectorized over batch)
-        state = torch.zeros(B * H * W, self.d_state, device=x.device, dtype=x.dtype)
-        states = []
-        for t in range(C):  # scan along channel dim (like token dim)
-            state = A_bar * state + Bx.unsqueeze(1)  # broadcasting issue
-            # NO -- need to redesign. This is wrong.
-            pass
-        # Actually, the canonical approach for vision: treat spatial positions as tokens.
-        # Each pixel = one token. Scan in raster order, or better: bidirectional scan.
-        # BUT for a 32x32 image that's 1024 tokens. Scanning in PyTorch sequentially is SLOW.
-        # SOLUTION: Use a DIFFERENT architecture altogether.
-        # Instead of token-scanning SSM, use RWKV's time-mixing formula generalized to 2D:
-        # y_i = sigmoid(gate_i) * (decay_i * prev_i + (1-decay_i) * x_i)
-        # where prev_i is previous token mixed spatially via depthwise conv.
-        #
-        # This avoids seq scan: all operations are parallel.
-        # REWRITE:
-        pass