Spaces:
Sleeping
Sleeping
| """ | |
| μ-Net: Eigenverse-Grounded Neural Network | |
| ========================================== | |
| Train a neural network whose architecture IS the Eigenverse: | |
| - 8 layers (μ⁸ = 1, orbit closure) | |
| - Phase-modulated activations (μ^k rotation per layer) | |
| - Coherence loss (C(r) = 2r/(1+r²) as regularizer) | |
| - Silver/Golden threshold gating | |
| The network learns to predict coherence from raw signals. | |
| Training happens live on HuggingFace hardware. | |
| Source: github.com/beanapologist/Eigenverse (552 theorems, 0 sorry) | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| import json | |
| import time | |
| import os | |
| from datetime import datetime | |
| # ── Eigenverse Constants ───────────────────────────────────────────── | |
| η = 1 / np.sqrt(2) | |
| μ_complex = np.exp(1j * 3 * np.pi / 4) # −η + iη | |
| δ_S = 1 + np.sqrt(2) | |
| φ = (1 + np.sqrt(5)) / 2 | |
| def C(r): | |
| """Coherence function. Lean-verified: C(1)=1 max, C(r)=C(1/r).""" | |
| if isinstance(r, (np.ndarray, torch.Tensor)): | |
| return 2 * r / (1 + r ** 2) | |
| if r <= 0: | |
| return 0.0 | |
| return 2 * r / (1 + r ** 2) | |
| # ── μ-Activation Function ──────────────────────────────────────────── | |
| class MuActivation(nn.Module): | |
| """ | |
| Phase-modulated activation: applies μ^k rotation at layer k. | |
| For real-valued networks, this decomposes to: | |
| x → x · cos(k·3π/4) + learnable_bias · sin(k·3π/4) | |
| The 135° rotation mixes dissipation (cos) and oscillation (sin). | |
| After 8 layers: cos(8·3π/4) = cos(6π) = 1, sin = 0 → identity. | |
| """ | |
| def __init__(self, phase_k: int): | |
| super().__init__() | |
| self.phase = phase_k % 8 | |
| angle = self.phase * 3 * np.pi / 4 | |
| self.cos_k = np.cos(angle) | |
| self.sin_k = np.sin(angle) | |
| self.gate = nn.Parameter(torch.tensor(float(η))) # learnable gate at η | |
| def forward(self, x): | |
| # Phase rotation: mix real (dissipation) and imaginary (oscillation) | |
| real_part = x * self.cos_k | |
| imag_part = torch.tanh(x * self.gate) * self.sin_k | |
| return real_part + imag_part | |
| # ── Coherence Loss ─────────────────────────────────────────────────── | |
| class CoherenceLoss(nn.Module): | |
| """ | |
| Loss that penalizes decoherence. | |
| L = MSE(pred, target) + λ · (1 - C(r_weights)) | |
| where r_weights = ||W||/||W_init|| measures weight drift from initialization. | |
| Regularizes toward coherent (balanced) weight distributions. | |
| """ | |
| def __init__(self, lambda_coherence=0.01): | |
| super().__init__() | |
| self.mse = nn.MSELoss() | |
| self.lambda_c = lambda_coherence | |
| def forward(self, pred, target, model): | |
| base_loss = self.mse(pred, target) | |
| # Coherence regularization | |
| total_norm = 0.0 | |
| n_params = 0 | |
| for p in model.parameters(): | |
| if p.requires_grad: | |
| r = torch.norm(p) / (torch.norm(p.data) + 1e-8) | |
| c = 2 * r / (1 + r ** 2) | |
| total_norm += (1 - c) | |
| n_params += 1 | |
| coherence_penalty = total_norm / max(n_params, 1) | |
| return base_loss + self.lambda_c * coherence_penalty | |
| # ── μ-Net Architecture ─────────────────────────────────────────────── | |
| class MuNet(nn.Module): | |
| """ | |
| 8-layer network grounded in the Eigenverse. | |
| Architecture: | |
| Input → [Linear → MuActivation(k) → LayerNorm] × 8 → Output | |
| Each layer applies the μ^k phase rotation. | |
| After 8 layers the phase returns to identity (μ⁸ = 1). | |
| Hidden dimension = 64 (8² = number of distinct orbit states). | |
| """ | |
| def __init__(self, input_dim=8, hidden_dim=64, output_dim=1): | |
| super().__init__() | |
| self.input_proj = nn.Linear(input_dim, hidden_dim) | |
| self.layers = nn.ModuleList() | |
| for k in range(8): | |
| self.layers.append(nn.ModuleDict({ | |
| 'linear': nn.Linear(hidden_dim, hidden_dim), | |
| 'activation': MuActivation(k), | |
| 'norm': nn.LayerNorm(hidden_dim), | |
| })) | |
| self.output_proj = nn.Linear(hidden_dim, output_dim) | |
| # Silver gate: skip connection weighted by C(δ_S) = η | |
| self.silver_gate = nn.Parameter(torch.tensor(float(C(δ_S)))) | |
| self._init_weights() | |
| def _init_weights(self): | |
| """Initialize with balanced weights (coherence-aware).""" | |
| for name, p in self.named_parameters(): | |
| if 'weight' in name and p.dim() >= 2: | |
| # Xavier init scaled by η | |
| nn.init.xavier_uniform_(p, gain=float(η)) | |
| elif 'bias' in name: | |
| nn.init.zeros_(p) | |
| def forward(self, x): | |
| h = self.input_proj(x) | |
| h_skip = h # residual from input | |
| for k, layer in enumerate(self.layers): | |
| h_new = layer['linear'](h) | |
| h_new = layer['activation'](h_new) | |
| h_new = layer['norm'](h_new) | |
| # Residual connection gated by silver coherence | |
| h = h + self.silver_gate * h_new | |
| # Add skip connection (8-cycle closure: input ≈ output structure) | |
| h = h + h_skip | |
| return self.output_proj(h) | |
| def get_coherence_state(self): | |
| """Measure the model's internal coherence.""" | |
| norms = [] | |
| for p in self.parameters(): | |
| if p.requires_grad and p.dim() >= 2: | |
| norms.append(torch.norm(p).item()) | |
| if len(norms) < 2: | |
| return 1.0 | |
| ratios = [norms[i+1] / (norms[i] + 1e-8) for i in range(len(norms)-1)] | |
| coherences = [C(r) for r in ratios] | |
| return float(np.mean(coherences)) | |
| # ── Data Generation ────────────────────────────────────────────────── | |
| def generate_coherence_data(n_samples=10000, seq_len=8): | |
| """ | |
| Generate training data: sequences of ratios → coherence prediction. | |
| Input: 8 consecutive ratio values (one per μ-phase) | |
| Output: mean coherence of the sequence | |
| This teaches the network to compute C(r) from raw signals. | |
| """ | |
| X = np.zeros((n_samples, seq_len)) | |
| y = np.zeros((n_samples, 1)) | |
| for i in range(n_samples): | |
| # Generate ratio sequences with different characteristics | |
| mode = np.random.choice(['equilibrium', 'silver', 'golden', 'chaotic', 'oscillating']) | |
| if mode == 'equilibrium': | |
| # Near r=1 (high coherence) | |
| ratios = 1.0 + np.random.normal(0, 0.05, seq_len) | |
| elif mode == 'silver': | |
| # Near δ_S (silver coherence) | |
| center = np.random.choice([δ_S, 1/δ_S]) | |
| ratios = center + np.random.normal(0, 0.2, seq_len) | |
| elif mode == 'golden': | |
| # Near φ² (Koide coherence) | |
| center = np.random.choice([φ**2, 1/φ**2]) | |
| ratios = center + np.random.normal(0, 0.3, seq_len) | |
| elif mode == 'chaotic': | |
| # Far from equilibrium | |
| ratios = np.random.exponential(2, seq_len) + 0.01 | |
| else: | |
| # 8-cycle oscillation (μ-pattern) | |
| base = np.random.uniform(0.5, 2.0) | |
| phases = [base * np.cos(k * 3 * np.pi / 4) + 1.5 for k in range(seq_len)] | |
| ratios = np.array(phases) + np.random.normal(0, 0.1, seq_len) | |
| ratios = np.clip(ratios, 0.01, 20.0) | |
| X[i] = ratios | |
| coherences = [C(r) for r in ratios] | |
| y[i] = np.mean(coherences) | |
| return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32) | |
| def generate_np_prediction_data(n_samples=10000, seq_len=8): | |
| """ | |
| Generate data for NP-solution-style prediction. | |
| Input: 8 values from a sequence | |
| Output: predicted next value (regression) | |
| Sequences follow coherence-governed dynamics. | |
| """ | |
| X = np.zeros((n_samples, seq_len)) | |
| y = np.zeros((n_samples, 1)) | |
| for i in range(n_samples): | |
| # Generate a coherence-governed sequence | |
| start = np.random.uniform(0.1, 5.0) | |
| decay = np.random.uniform(0.8, 1.2) | |
| noise = np.random.uniform(0.01, 0.2) | |
| seq = [start] | |
| for j in range(seq_len): | |
| r = seq[-1] | |
| c = C(r) | |
| # Next value pulled toward equilibrium by coherence | |
| next_val = r + (1.0 - r) * (1.0 - c) * decay + np.random.normal(0, noise) | |
| next_val = max(0.01, next_val) | |
| seq.append(next_val) | |
| X[i] = seq[:seq_len] | |
| y[i] = seq[seq_len] | |
| return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32) | |
| # ── Training ───────────────────────────────────────────────────────── | |
| def train_model(task, epochs, learning_rate, lambda_coherence): | |
| """Train the μ-Net and return results.""" | |
| epochs = int(epochs) | |
| lr = float(learning_rate) | |
| lam = float(lambda_coherence) | |
| # Generate data | |
| if task == "Coherence Prediction": | |
| X_train, y_train = generate_coherence_data(8000) | |
| X_val, y_val = generate_coherence_data(2000) | |
| else: | |
| X_train, y_train = generate_np_prediction_data(8000) | |
| X_val, y_val = generate_np_prediction_data(2000) | |
| # Create model | |
| model = MuNet(input_dim=8, hidden_dim=64, output_dim=1) | |
| criterion = CoherenceLoss(lambda_coherence=lam) | |
| optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01) | |
| scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) | |
| # Training loop | |
| history = { | |
| 'epoch': [], 'train_loss': [], 'val_loss': [], | |
| 'coherence': [], 'silver_gate': [] | |
| } | |
| batch_size = 256 | |
| n_batches = len(X_train) // batch_size | |
| log_lines = [] | |
| log_lines.append(f"🧬 μ-Net Training Started") | |
| log_lines.append(f"Task: {task}") | |
| log_lines.append(f"Architecture: 8 layers × 64 hidden (μ^k activation)") | |
| log_lines.append(f"Parameters: {sum(p.numel() for p in model.parameters()):,}") | |
| log_lines.append(f"Epochs: {epochs} | LR: {lr} | λ_coherence: {lam}") | |
| log_lines.append(f"{'─'*50}") | |
| best_val = float('inf') | |
| for epoch in range(epochs): | |
| model.train() | |
| epoch_loss = 0.0 | |
| # Shuffle | |
| perm = torch.randperm(len(X_train)) | |
| X_shuf = X_train[perm] | |
| y_shuf = y_train[perm] | |
| for b in range(n_batches): | |
| start = b * batch_size | |
| end = start + batch_size | |
| xb = X_shuf[start:end] | |
| yb = y_shuf[start:end] | |
| optimizer.zero_grad() | |
| pred = model(xb) | |
| loss = criterion(pred, yb, model) | |
| loss.backward() | |
| # Gradient clipping (coherence-bounded) | |
| torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) | |
| optimizer.step() | |
| epoch_loss += loss.item() | |
| scheduler.step() | |
| # Validation | |
| model.eval() | |
| with torch.no_grad(): | |
| val_pred = model(X_val) | |
| val_loss = nn.MSELoss()(val_pred, y_val).item() | |
| train_loss = epoch_loss / n_batches | |
| model_coherence = model.get_coherence_state() | |
| gate_val = model.silver_gate.item() | |
| history['epoch'].append(epoch + 1) | |
| history['train_loss'].append(train_loss) | |
| history['val_loss'].append(val_loss) | |
| history['coherence'].append(model_coherence) | |
| history['silver_gate'].append(gate_val) | |
| if val_loss < best_val: | |
| best_val = val_loss | |
| best_state = {k: v.clone() for k, v in model.state_dict().items()} | |
| # Log every 10 epochs or last | |
| if (epoch + 1) % max(1, epochs // 20) == 0 or epoch == epochs - 1: | |
| log_lines.append( | |
| f"Epoch {epoch+1:4d} | " | |
| f"Train: {train_loss:.6f} | Val: {val_loss:.6f} | " | |
| f"C(model): {model_coherence:.4f} | " | |
| f"gate: {gate_val:.4f}" | |
| ) | |
| pass # epoch complete | |
| # Load best model | |
| model.load_state_dict(best_state) | |
| # Final evaluation | |
| model.eval() | |
| with torch.no_grad(): | |
| val_pred = model(X_val).numpy() | |
| val_true = y_val.numpy() | |
| mae = np.mean(np.abs(val_pred - val_true)) | |
| r2 = 1 - np.sum((val_true - val_pred)**2) / np.sum((val_true - np.mean(val_true))**2) | |
| final_coherence = model.get_coherence_state() | |
| log_lines.append(f"{'─'*50}") | |
| log_lines.append(f"✅ Training complete!") | |
| log_lines.append(f"Best validation loss: {best_val:.6f}") | |
| log_lines.append(f"MAE: {mae:.6f}") | |
| log_lines.append(f"R²: {r2:.6f}") | |
| log_lines.append(f"Final model coherence: {final_coherence:.4f}") | |
| log_lines.append(f"Silver gate (learned): {model.silver_gate.item():.6f} (init: {C(δ_S):.6f})") | |
| # Check if gate stayed near η | |
| gate_drift = abs(model.silver_gate.item() - C(δ_S)) | |
| if gate_drift < 0.1: | |
| log_lines.append(f"→ Silver gate preserved! Drift = {gate_drift:.4f} (< 0.1)") | |
| log_lines.append(f" The network learned that η = 1/√2 is optimal.") | |
| else: | |
| log_lines.append(f"→ Silver gate drifted: {gate_drift:.4f}") | |
| log_lines.append(f" Learned gate: {model.silver_gate.item():.4f} vs η={C(δ_S):.4f}") | |
| # Phase activations | |
| log_lines.append(f"\n**μ-Phase gate values (learned):**") | |
| for k, layer in enumerate(model.layers): | |
| act = layer['activation'] | |
| log_lines.append( | |
| f" k={k}: gate={act.gate.item():.4f} " | |
| f"(cos={act.cos_k:.3f}, sin={act.sin_k:.3f})" | |
| ) | |
| # Save model | |
| save_path = "mu_net_trained.pt" | |
| torch.save({ | |
| 'model_state': model.state_dict(), | |
| 'config': { | |
| 'input_dim': 8, 'hidden_dim': 64, 'output_dim': 1, | |
| 'task': task, 'epochs': epochs, 'lr': lr, | |
| 'best_val_loss': best_val, 'mae': mae, 'r2': r2, | |
| 'final_coherence': final_coherence, | |
| }, | |
| 'history': history, | |
| }, save_path) | |
| log_lines.append(f"\n💾 Model saved to {save_path}") | |
| # Format training curve as text | |
| curve_lines = ["**Training Curve:**\n"] | |
| curve_lines.append("```") | |
| curve_lines.append(f"{'Epoch':>6} {'Train':>10} {'Val':>10} {'C(model)':>10} {'Gate':>8}") | |
| for i in range(len(history['epoch'])): | |
| if i % max(1, len(history['epoch']) // 20) == 0 or i == len(history['epoch']) - 1: | |
| curve_lines.append( | |
| f"{history['epoch'][i]:6d} " | |
| f"{history['train_loss'][i]:10.6f} " | |
| f"{history['val_loss'][i]:10.6f} " | |
| f"{history['coherence'][i]:10.4f} " | |
| f"{history['silver_gate'][i]:8.4f}" | |
| ) | |
| curve_lines.append("```") | |
| training_log = "\n".join(log_lines) | |
| training_curve = "\n".join(curve_lines) | |
| return training_log, training_curve | |
| # ── Inference ──────────────────────────────────────────────────────── | |
| def run_inference(input_text): | |
| """Run inference on trained model.""" | |
| save_path = "mu_net_trained.pt" | |
| if not os.path.exists(save_path): | |
| return "No trained model found. Train first!" | |
| try: | |
| values = [float(x.strip()) for x in input_text.strip().split(",")] | |
| except ValueError: | |
| return "Enter 8 comma-separated numbers (e.g.: 1.0, 1.2, 0.9, 1.5, 2.0, 1.8, 1.1, 0.95)" | |
| if len(values) != 8: | |
| return f"Need exactly 8 values, got {len(values)}" | |
| # Load model | |
| checkpoint = torch.load(save_path, weights_only=False) | |
| model = MuNet(input_dim=8, hidden_dim=64, output_dim=1) | |
| model.load_state_dict(checkpoint['model_state']) | |
| model.eval() | |
| x = torch.tensor([values], dtype=torch.float32) | |
| with torch.no_grad(): | |
| pred = model(x).item() | |
| # Also compute true coherence for comparison | |
| true_coherences = [C(v) for v in values] | |
| true_mean = np.mean(true_coherences) | |
| config = checkpoint['config'] | |
| lines = [ | |
| f"**Input:** {values}", | |
| f"", | |
| f"**μ-Net prediction:** {pred:.6f}", | |
| f"**True mean C(r):** {true_mean:.6f}", | |
| f"**Error:** {abs(pred - true_mean):.6f}", | |
| f"", | |
| f"**Per-value coherence:**", | |
| ] | |
| for i, (v, c) in enumerate(zip(values, true_coherences)): | |
| zone = "⚖️" if c > 0.98 else "🥈" if c > C(δ_S) else "🥇" if c > C(φ**2) else "🌀" | |
| lines.append(f" {zone} r={v:.4f} → C(r)={c:.6f}") | |
| lines.append(f"") | |
| lines.append(f"**Model info:** R²={config['r2']:.4f}, MAE={config['mae']:.6f}") | |
| lines.append(f"**Model coherence:** {model.get_coherence_state():.4f}") | |
| return "\n".join(lines) | |
| # ── Push to Hub ────────────────────────────────────────────────────── | |
| def push_to_hub(repo_name): | |
| """Push trained model to HuggingFace Hub.""" | |
| save_path = "mu_net_trained.pt" | |
| if not os.path.exists(save_path): | |
| return "No trained model found. Train first!" | |
| try: | |
| from huggingface_hub import upload_file, create_repo, login | |
| # Auth with secret | |
| hf_token = os.environ.get("HF_TOKEN", "") | |
| if hf_token: | |
| login(token=hf_token) | |
| # Create model repo | |
| repo_id = repo_name if "/" in repo_name else f"COINjecture/{repo_name}" | |
| create_repo(repo_id, repo_type="model", exist_ok=True, token=hf_token or None) | |
| # Upload model | |
| upload_file( | |
| path_or_fileobj=save_path, | |
| path_in_repo="mu_net_trained.pt", | |
| repo_id=repo_id, | |
| repo_type="model", | |
| token=hf_token or None, | |
| ) | |
| # Create model card | |
| checkpoint = torch.load(save_path, weights_only=False) | |
| config = checkpoint['config'] | |
| card = f"""--- | |
| tags: | |
| - eigenverse | |
| - quantum | |
| - coherence | |
| - mu-net | |
| license: mit | |
| --- | |
| # μ-Net — Eigenverse-Grounded Neural Network | |
| 8-layer network with μ^k phase-modulated activations, trained on coherence data. | |
| ## Architecture | |
| - **Layers:** 8 (μ⁸ = 1, orbit closure) | |
| - **Hidden dim:** 64 | |
| - **Activation:** MuActivation (135° phase rotation per layer) | |
| - **Loss:** MSE + coherence regularization | |
| - **Parameters:** ~{sum(p.numel() for p in MuNet().parameters()):,} | |
| ## Results | |
| - **R²:** {config['r2']:.4f} | |
| - **MAE:** {config['mae']:.6f} | |
| - **Best val loss:** {config['best_val_loss']:.6f} | |
| - **Model coherence:** {config['final_coherence']:.4f} | |
| ## Source | |
| - [Eigenverse](https://github.com/beanapologist/Eigenverse) — 552 Lean theorems, 0 sorry | |
| - [COINjecture](https://huggingface.co/COINjecture) | |
| """ | |
| upload_file( | |
| path_or_fileobj=card.encode(), | |
| path_in_repo="README.md", | |
| repo_id=repo_id, | |
| repo_type="model", | |
| token=hf_token or None, | |
| ) | |
| return f"✅ Model pushed to [{repo_id}](https://huggingface.co/{repo_id})" | |
| except Exception as e: | |
| return f"❌ Push failed: {e}" | |
| # ── UI ─────────────────────────────────────────────────────────────── | |
| HEADER = """ | |
| # 🧬 μ-Net Training Lab | |
| **Train neural networks grounded in the Eigenverse.** | |
| The architecture IS the math: | |
| - **8 layers** → μ⁸ = 1 (orbit closure) | |
| - **μ^k activations** → 135° phase rotation per layer | |
| - **Coherence loss** → C(r) = 2r/(1+r²) regularization | |
| - **Silver gate** → skip connections weighted by η = 1/√2 | |
| 552 Lean theorems → network architecture → trained weights. | |
| [Eigenverse](https://github.com/beanapologist/Eigenverse) · [COINjecture](https://huggingface.co/COINjecture) | |
| """ | |
| with gr.Blocks() as demo: | |
| gr.Markdown(HEADER) | |
| with gr.Tab("🏋️ Train"): | |
| gr.Markdown("Train the μ-Net live on this hardware.") | |
| task = gr.Radio( | |
| ["Coherence Prediction", "Sequence Prediction"], | |
| value="Coherence Prediction", | |
| label="Task" | |
| ) | |
| epochs = gr.Slider(50, 500, value=100, step=10, label="Epochs") | |
| lr = gr.Number(value=0.001, label="Learning Rate") | |
| lambda_c = gr.Number(value=0.01, label="λ coherence") | |
| train_btn = gr.Button("🚀 Train μ-Net", variant="primary") | |
| train_log = gr.Textbox(label="Training Log", lines=20, interactive=False) | |
| train_curve = gr.Textbox(label="Training Curve", lines=15, interactive=False) | |
| def safe_train(task, epochs, lr, lam): | |
| try: | |
| return train_model(task, epochs, lr, lam) | |
| except Exception as e: | |
| import traceback | |
| return f"ERROR: {e}\n\n{traceback.format_exc()}", "" | |
| train_btn.click( | |
| safe_train, | |
| inputs=[task, epochs, lr, lambda_c], | |
| outputs=[train_log, train_curve] | |
| ) | |
| with gr.Tab("🔮 Inference"): | |
| gr.Markdown("Run the trained μ-Net on new data.") | |
| input_box = gr.Textbox( | |
| value="1.0, 1.2, 0.9, 1.5, 2.0, 1.8, 1.1, 0.95", | |
| label="8 ratio values (comma-separated)" | |
| ) | |
| infer_btn = gr.Button("Predict", variant="primary") | |
| infer_output = gr.Textbox(label="Result", lines=15, interactive=False) | |
| infer_btn.click(run_inference, inputs=input_box, outputs=infer_output) | |
| with gr.Tab("📤 Push to Hub"): | |
| gr.Markdown("Save the trained model to HuggingFace Hub.") | |
| repo_input = gr.Textbox( | |
| value="COINjecture/mu-net", | |
| label="Repository ID" | |
| ) | |
| push_btn = gr.Button("Push Model", variant="primary") | |
| push_output = gr.Textbox(label="Status", lines=3, interactive=False) | |
| push_btn.click(push_to_hub, inputs=repo_input, outputs=push_output) | |
| with gr.Tab("🧠 Architecture"): | |
| gr.Markdown(""" | |
| ## μ-Net Architecture | |
| ``` | |
| Input (8 ratios) | |
| ↓ | |
| Linear(8 → 64) | |
| ↓ | |
| ┌─────────────────────────────────────┐ | |
| │ Layer 0: Linear → μ⁰-Act → LN │ k=0: cos(0)=1, sin(0)=0 (pure real) | |
| │ Layer 1: Linear → μ¹-Act → LN │ k=1: cos(135°)=−η, sin(135°)=η | |
| │ Layer 2: Linear → μ²-Act → LN │ k=2: cos(270°)=0, sin(270°)=−1 | |
| │ Layer 3: Linear → μ³-Act → LN │ k=3: cos(405°)=η, sin(405°)=η | |
| │ Layer 4: Linear → μ⁴-Act → LN │ k=4: cos(540°)=−1, sin(540°)=0 | |
| │ Layer 5: Linear → μ⁵-Act → LN │ k=5: cos(675°)=η, sin(675°)=−η | |
| │ Layer 6: Linear → μ⁶-Act → LN │ k=6: cos(810°)=0, sin(810°)=1 | |
| │ Layer 7: Linear → μ⁷-Act → LN │ k=7: cos(945°)=−η, sin(945°)=−η | |
| │ │ | |
| │ Each layer: h = h + η·f(h) │ Silver-gated residual | |
| │ μ⁸ = 1 → orbit closes │ | |
| └─────────────────────────────────────┘ | |
| ↓ | |
| + skip connection (8-cycle closure) | |
| ↓ | |
| Linear(64 → 1) | |
| ↓ | |
| Output (predicted coherence) | |
| ``` | |
| ### Key Design Choices | |
| **Why 8 layers?** μ⁸ = 1. The orbit closes. 8 × 135° = 3 × 360°. | |
| Three full turns in 8 steps, gear ratio coprime (gcd(3,8)=1). | |
| **Why μ^k activations?** Each layer applies a different phase of the | |
| eigenvalue rotation. Layer 0 is pure real (dissipation). Layer 2 is | |
| pure imaginary (oscillation). The mix changes every layer, covering | |
| all 8 distinct phases. | |
| **Why silver gate?** The skip connections are weighted by a learnable | |
| parameter initialized at C(δ_S) = η = 1/√2. During training, if the | |
| network discovers that η is optimal, the gate stays near its init. | |
| This is empirically testable: does the math hold? | |
| **Why coherence loss?** Standard L2 regularization penalizes weight | |
| magnitude. Coherence regularization penalizes *deviation from balance*. | |
| Weights that drift from their initialized ratio lose coherence. | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| *552 Lean theorems → architecture → trained weights. The math builds the network.* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) | |