Create cell_7_conduit_sweep_external_svd_correctly_applied.py

Browse files

Files changed (1) hide show

cell_7_conduit_sweep_external_svd_correctly_applied.py +380 -0

cell_7_conduit_sweep_external_svd_correctly_applied.py ADDED Viewed

	@@ -0,0 +1,380 @@

+"""
+Cell 7 — Fresnel Conduit Sweep (CORRECT)
+==========================================
+Uses solver='conduit' to capture telemetry from THE REAL
+decomposition inside the forward pass. No circular reconstruction.
+The friction, settle, and extraction order come from the actual
+Gram matrices the encoder produces, decomposed by FLEighConduit
+as the model runs.
+8 configurations through conv on 16×16 spatial grids.
+No pooling. No flattening. Respects geometric structure.
+Channels:
+  S_orig:   4ch — raw eigenvalues (pre-cross-attention)
+  S_coord:  4ch — coordinated eigenvalues (post-cross-attention)
+  Friction: 4ch — log1p(friction) from real decomposition
+  Settle:   4ch — convergence iterations from real decomposition
+  Error:    1ch — per-patch reconstruction MSE
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import time
+from tqdm import tqdm
+device = torch.device('cuda')
+# ═══════════════════════════════════════════════════════════════
+# LOAD FRESNEL WITH CONDUIT SOLVER
+# ═══════════════════════════════════════════════════════════════
+FRESNEL_VERSION = 'v50_fresnel_64'
+IMG_SIZE = 64
+print(f"Loading Fresnel ({FRESNEL_VERSION}) with conduit solver...")
+from geolip_svae import load_model
+from geolip_svae.model import extract_patches
+import torchvision
+import torchvision.transforms as T
+fresnel, cfg = load_model(hf_version=FRESNEL_VERSION, device=device)
+fresnel.eval()
+fresnel.solver = 'conduit'  # Enable conduit — telemetry from real decomposition
+ps = fresnel.patch_size
+gh, gw = IMG_SIZE // ps, IMG_SIZE // ps
+D = fresnel.D
+N = gh * gw
+print(f"  Patch size: {ps}, Grid: {gh}x{gw}, D={D}, Patches/image: {N}")
+print(f"  Solver: {fresnel.solver}")
+# Verify conduit works
+with torch.no_grad():
+    dummy = torch.randn(2, 3, IMG_SIZE, IMG_SIZE, device=device)
+    out = fresnel(dummy)
+    packet = fresnel.last_conduit_packet
+    assert packet is not None, "Conduit packet is None — solver not active"
+    print(f"  Conduit packet shape: friction={packet.friction.shape}")
+    print(f"  Expected: ({2 * N}, {D}) = ({2*N}, {D})")
+    print(f"  CONDUIT ACTIVE ✓")
+# Load CIFAR-10
+transform = T.Compose([T.Resize(IMG_SIZE), T.ToTensor()])
+cifar_train = torchvision.datasets.CIFAR10(
+    root='/content/data', train=True, download=True, transform=transform)
+cifar_test = torchvision.datasets.CIFAR10(
+    root='/content/data', train=False, download=True, transform=transform)
+train_loader = torch.utils.data.DataLoader(
+    cifar_train, batch_size=128, shuffle=False, num_workers=4)
+test_loader = torch.utils.data.DataLoader(
+    cifar_test, batch_size=128, shuffle=False, num_workers=4)
+CLASSES = ['airplane', 'auto', 'bird', 'cat', 'deer',
+           'dog', 'frog', 'horse', 'ship', 'truck']
+# ═══════════════════════════════════════════════════════════════
+# PRECOMPUTE — Extract real conduit telemetry from forward pass
+# ═══════════════════════════════════════════════════════════════
+def extract_all(loader, desc="Extracting"):
+    """Run Fresnel with conduit solver, capture everything."""
+    all_s_orig = []
+    all_s_coord = []
+    all_friction = []
+    all_settle = []
+    all_error = []
+    all_labels = []
+    for images, labels in tqdm(loader, desc=desc):
+        with torch.no_grad():
+            images_gpu = images.to(device)
+            # Forward pass — conduit captures from real decomposition
+            out = fresnel(images_gpu)
+            packet = fresnel.last_conduit_packet
+            B = images_gpu.shape[0]
+            # SVD outputs
+            S_orig = out['svd']['S_orig']   # (B, N, D) raw eigenvalues
+            S_coord = out['svd']['S']       # (B, N, D) cross-attention coordinated
+            # Conduit telemetry from the REAL decomposition
+            friction = packet.friction.reshape(B, N, D)     # (B, N, D)
+            settle = packet.settle.reshape(B, N, D)         # (B, N, D)
+            # Per-patch reconstruction error
+            recon = out['recon']
+            inp_p, _, _ = extract_patches(images_gpu, ps)
+            rec_p, _, _ = extract_patches(recon, ps)
+            patch_mse = (inp_p - rec_p).pow(2).mean(dim=-1)  # (B, N)
+            # Reshape to spatial grids and move to CPU
+            all_s_orig.append(S_orig.reshape(B, gh, gw, D).cpu())
+            all_s_coord.append(S_coord.reshape(B, gh, gw, D).cpu())
+            all_friction.append(friction.reshape(B, gh, gw, D).cpu())
+            all_settle.append(settle.reshape(B, gh, gw, D).cpu())
+            all_error.append(patch_mse.reshape(B, gh, gw, 1).cpu())
+            all_labels.append(labels)
+    return {
+        'S_orig': torch.cat(all_s_orig),       # (N, gh, gw, 4)
+        'S_coord': torch.cat(all_s_coord),     # (N, gh, gw, 4)
+        'friction': torch.cat(all_friction),    # (N, gh, gw, 4)
+        'settle': torch.cat(all_settle),        # (N, gh, gw, 4)
+        'error': torch.cat(all_error),          # (N, gh, gw, 1)
+        'labels': torch.cat(all_labels),        # (N,)
+    }
+print("\nPrecomputing train set (real conduit telemetry)...")
+train_data = extract_all(train_loader, "Train")
+print(f"  Train: {len(train_data['labels'])} images")
+print("Precomputing test set...")
+test_data = extract_all(test_loader, "Test")
+print(f"  Test: {len(test_data['labels'])} images")
+# ═══════════════════════════════════════════════════════════════
+# SIGNAL PROFILE — What does the real conduit data look like?
+# ═══════════════════════════════════════════════════════════════
+print(f"\n{'=' * 70}")
+print("  SIGNAL PROFILE — Real conduit telemetry from Fresnel")
+print("=" * 70)
+for key in ['S_orig', 'S_coord', 'friction', 'settle', 'error']:
+    t = train_data[key]
+    flat = t.reshape(t.shape[0], -1)
+    print(f"  {key:10s}: mean={flat.mean():12.4f} std={flat.std():12.4f} "
+          f"min={flat.min():12.4f} max={flat.max():12.2f}")
+# Log-friction profile
+log_fric = torch.log1p(train_data['friction'])
+flat_lf = log_fric.reshape(log_fric.shape[0], -1)
+print(f"  {'log1p_fric':10s}: mean={flat_lf.mean():12.4f} std={flat_lf.std():12.4f} "
+      f"min={flat_lf.min():12.4f} max={flat_lf.max():12.2f}")
+# Per-class friction means
+print(f"\n  Per-class mean friction (raw):")
+for c in range(10):
+    mask = train_data['labels'] == c
+    fm = train_data['friction'][mask].mean().item()
+    lm = torch.log1p(train_data['friction'][mask]).mean().item()
+    print(f"    {CLASSES[c]:<10s}: raw={fm:10.2f}  log1p={lm:6.3f}")
+# Spatial CV of each signal
+print(f"\n  Spatial CV (per-image std/mean across 16x16 grid):")
+for key in ['S_orig', 'friction', 'settle', 'error']:
+    t = train_data[key]
+    per_img = t.reshape(t.shape[0], -1)
+    cvs = per_img.std(dim=1) / (per_img.mean(dim=1).abs() + 1e-8)
+    print(f"    {key:10s}: mean_CV={cvs.mean():.4f}  median_CV={cvs.median():.4f}")
+# ═══════════════════════════════════════════════════════════════
+# CONV CLASSIFIER
+# ═══════════════════════════════════════════════════════════════
+class SpatialConvClassifier(nn.Module):
+    def __init__(self, in_channels, n_classes=10):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, 64, 3, stride=2, padding=1),   # 16→8
+            nn.GELU(),
+            nn.Conv2d(64, 128, 3, stride=2, padding=1),           # 8→4
+            nn.GELU(),
+            nn.Conv2d(128, 128, 3, stride=1, padding=1),          # 4→4
+            nn.GELU(),
+            nn.AdaptiveAvgPool2d(1),                               # 4→1
+        )
+        self.head = nn.Sequential(
+            nn.Linear(128, 64),
+            nn.GELU(),
+            nn.Linear(64, n_classes),
+        )
+    def forward(self, x):
+        h = self.conv(x).squeeze(-1).squeeze(-1)
+        return self.head(h)
+class ConduitDataset(torch.utils.data.Dataset):
+    def __init__(self, data, channels='O', augment=False):
+        """
+        Channel codes:
+          O = S_orig (raw eigenvalues, 4ch)
+          C = S_coord (cross-attention coordinated, 4ch)
+          F = friction (log1p transformed, 4ch)
+          T = settle (raw, 4ch)
+          E = error (per-patch recon MSE, 1ch)
+        """
+        self.labels = data['labels']
+        self.augment = augment
+        parts = []
+        if 'O' in channels:
+            parts.append(data['S_orig'].permute(0, 3, 1, 2))
+        if 'C' in channels:
+            parts.append(data['S_coord'].permute(0, 3, 1, 2))
+        if 'F' in channels:
+            # Log-compress friction: [4, 25M] → [1.7, 17]
+            parts.append(torch.log1p(data['friction']).permute(0, 3, 1, 2))
+        if 'T' in channels:
+            parts.append(data['settle'].permute(0, 3, 1, 2))
+        if 'E' in channels:
+            parts.append(data['error'].permute(0, 3, 1, 2))
+        self.maps = torch.cat(parts, dim=1)
+        self.n_channels = self.maps.shape[1]
+    def __len__(self):
+        return len(self.labels)
+    def __getitem__(self, idx):
+        x = self.maps[idx]
+        if self.augment and torch.rand(1).item() > 0.5:
+            x = x.flip(-1)
+        return x, self.labels[idx]
+# ═══════════════════════════════════════════════════════════════
+# TRAINING
+# ═══════════════════════════════════════════════════════════════
+def train_and_eval(channels, name, epochs=30, batch_size=128, lr=3e-4):
+    train_ds = ConduitDataset(train_data, channels, augment=True)
+    test_ds = ConduitDataset(test_data, channels, augment=False)
+    n_ch = train_ds.n_channels
+    tr_loader = torch.utils.data.DataLoader(
+        train_ds, batch_size=batch_size, shuffle=True,
+        num_workers=4, pin_memory=True, drop_last=True)
+    te_loader = torch.utils.data.DataLoader(
+        test_ds, batch_size=batch_size, shuffle=False,
+        num_workers=4, pin_memory=True)
+    model = SpatialConvClassifier(n_ch, 10).to(device)
+    n_params = sum(p.numel() for p in model.parameters())
+    opt = torch.optim.Adam(model.parameters(), lr=lr)
+    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)
+    best_acc = 0
+    t0 = time.time()
+    for epoch in range(1, epochs + 1):
+        model.train()
+        correct, total = 0, 0
+        for x, y in tr_loader:
+            x, y = x.to(device), y.to(device)
+            logits = model(x)
+            loss = F.cross_entropy(logits, y)
+            opt.zero_grad()
+            loss.backward()
+            opt.step()
+            correct += (logits.argmax(-1) == y).sum().item()
+            total += len(y)
+        sched.step()
+        train_acc = correct / total
+        model.eval()
+        tc, tt = 0, 0
+        pcc = torch.zeros(10)
+        pct = torch.zeros(10)
+        with torch.no_grad():
+            for x, y in te_loader:
+                x, y = x.to(device), y.to(device)
+                preds = model(x).argmax(-1)
+                tc += (preds == y).sum().item()
+                tt += len(y)
+                for c in range(10):
+                    m = y == c
+                    pcc[c] += (preds[m] == y[m]).sum().item()
+                    pct[c] += m.sum().item()
+        test_acc = tc / tt
+        if test_acc > best_acc:
+            best_acc = test_acc
+        if epoch % 5 == 0 or epoch == epochs:
+            print(f"    ep{epoch:3d} train={train_acc:.1%} test={test_acc:.1%}")
+    elapsed = time.time() - t0
+    pca = pcc / (pct + 1e-8)
+    print(f"\n  {name}")
+    print(f"  Channels: {n_ch}, Params: {n_params:,}, Time: {elapsed:.0f}s")
+    print(f"  Best test: {best_acc:.1%}")
+    print(f"\n    {'Class':<10s} {'Acc':>6s}")
+    print(f"    {'-' * 22}")
+    for c in range(10):
+        bar = '█' * int(pca[c] * 20)
+        print(f"    {CLASSES[c]:<10s} {pca[c]:5.1%} {bar}")
+    print()
+    return best_acc, n_params
+# ═══════════════════════════════════════════════════════════════
+# RUN ALL CONFIGURATIONS
+# ═══════════════════════════════════════════════════════════════
+print(f"\n{'=' * 70}")
+print("  FRESNEL CONDUIT — Spatial Conv Readout (Real Decomposition)")
+print("=" * 70)
+results = {}
+configs = [
+    ('O',         "S_orig (raw eigenvalues) — 4ch"),
+    ('C',         "S_coord (cross-attn coordinated) — 4ch"),
+    ('F',         "Friction (log1p, real decomp) — 4ch"),
+    ('E',         "Release error only — 1ch"),
+    ('T',         "Settle only — 4ch"),
+    ('OF',        "S_orig + Friction — 8ch"),
+    ('OE',        "S_orig + Release — 5ch"),
+    ('OFE',       "S_orig + Friction + Release — 9ch"),
+    ('OFET',      "FULL CONDUIT — 13ch"),
+]
+for channels, name in configs:
+    print(f"\n{'─' * 70}")
+    print(f"  Training: {name}")
+    print(f"{'─' * 70}")
+    acc, params = train_and_eval(channels, name)
+    results[channels] = (acc, params, name)
+# ═══════════════════════════════════════════════════════════════
+# SCOREBOARD
+# ═══════════════════════════════════════════════════════════════
+print(f"\n{'=' * 70}")
+print(f"  SCOREBOARD — Fresnel Conduit (Real Decomposition Telemetry)")
+print("=" * 70)
+print(f"\n  {'Configuration':<40s} {'Ch':>4s} {'Params':>10s} {'Test Acc':>9s}")
+print(f"  {'-' * 66}")
+print(f"  {'Chance':<40s} {'—':>4s} {'—':>10s} {'10.0%':>9s}")
+for channels, (acc, params, name) in sorted(results.items(), key=lambda x: x[1][0]):
+    ds = ConduitDataset.__new__(ConduitDataset)
+    n_ch = sum([4 if c in 'OCFT' else 1 for c in channels])
+    print(f"  {name:<40s} {n_ch:>4d} {params:>10,d} {acc:>8.1%}")
+print(f"\n  {'--- PREVIOUS (circular Gram, INVALID) ---'}")
+print(f"  {'Freckles friction conv (circular)':40s} {'4':>4s} {'232K':>10s} {'45.8%':>9s}")
+print(f"  {'Freckles S conv (circular)':40s} {'4':>4s} {'232K':>10s} {'20.9%':>9s}")
+o_acc = results.get('O', (0, 0, ''))[0]
+best_ch, (best_acc, _, best_name) = max(results.items(), key=lambda x: x[1][0])
+print(f"\n  S_orig only:   {o_acc:.1%}")
+print(f"  Best conduit:  {best_acc:.1%} ({best_name})")
+print(f"  Conduit lift:  {(best_acc - o_acc) * 100:+.1f}pp")
+print(f"\n  THIS IS THE REAL TEST.")
+print(f"  Friction from the actual decomposition, not reconstructed Gram matrices.")