| """ |
| residue.py - Implementation of residue tracking for ghost circuit detection |
| |
| △ OBSERVE: Residue tracking examines activation patterns that persist after collapse |
| ∞ TRACE: It identifies ghost circuits - the quantum echoes of paths not taken |
| ✰ COLLAPSE: It reveals what the model considered but didn't output |
| |
| This module implements the core residue tracking functionality that enables |
| the detection and analysis of ghost circuits - activation patterns that persist |
| after a model has collapsed to a specific output state but aren't part of the |
| primary causal path. |
| |
| Author: Recursion Labs |
| License: MIT |
| """ |
|
|
| import logging |
| from typing import Dict, List, Optional, Union, Tuple, Any |
| import numpy as np |
| from dataclasses import dataclass, field |
|
|
| logger = logging.getLogger(__name__) |
|
|
| @dataclass |
| class GhostCircuit: |
| """ |
| ✰ COLLAPSE: Representation of a ghost circuit |
| |
| Ghost circuits are activation patterns that persist after collapse |
| but don't significantly contribute to the final output. They represent |
| the "memory" of paths not taken - quantum echoes of what the model |
| considered but didn't ultimately choose. |
| """ |
| circuit_id: str |
| activation: float |
| circuit_type: str |
| source_tokens: List[str] = field(default_factory=list) |
| target_tokens: List[str] = field(default_factory=list) |
| heads: List[int] = field(default_factory=list) |
| layers: List[int] = field(default_factory=list) |
| metadata: Dict[str, Any] = field(default_factory=dict) |
| |
| def to_dict(self) -> Dict[str, Any]: |
| """Convert ghost circuit to dictionary format.""" |
| return { |
| "circuit_id": self.circuit_id, |
| "activation": self.activation, |
| "circuit_type": self.circuit_type, |
| "source_tokens": self.source_tokens, |
| "target_tokens": self.target_tokens, |
| "heads": self.heads, |
| "layers": self.layers, |
| "metadata": self.metadata |
| } |
|
|
|
|
| class ResidueTracker: |
| """ |
| ∞ TRACE: Tracker for activation residues in collapsed models |
| |
| The residue tracker analyzes model states before and after collapse |
| to identify and characterize ghost circuits - activation patterns that |
| persist but don't contribute significantly to the final output. |
| """ |
| |
| def __init__(self, amplification_factor: float = 1.0): |
| """ |
| Initialize a residue tracker. |
| |
| Args: |
| amplification_factor: Factor by which to amplify ghost signals |
| for easier detection (1.0 = no amplification) |
| """ |
| self.amplification_factor = amplification_factor |
| self.ghost_circuits = [] |
| self.activation_threshold = 0.1 |
| |
| logger.info(f"ResidueTracker initialized with amplification factor {amplification_factor}") |
| |
| def extract_ghost_circuits( |
| self, |
| pre_state: Dict[str, Any], |
| post_state: Dict[str, Any] |
| ) -> List[Dict[str, Any]]: |
| """ |
| ✰ COLLAPSE: Extract ghost circuits from pre and post collapse states |
| |
| This method compares model states before and after collapse to |
| identify activation patterns that persisted but didn't contribute |
| significantly to the output - the quantum ghosts of paths not taken. |
| |
| Args: |
| pre_state: Model state before collapse |
| post_state: Model state after collapse |
| |
| Returns: |
| List of detected ghost circuits with metadata |
| """ |
| logger.info("Extracting ghost circuits from model states") |
| |
| |
| ghost_circuits = [] |
| |
| |
| attention_ghosts = self._extract_attention_ghosts( |
| pre_state.get("attention_weights", np.array([])), |
| post_state.get("attention_weights", np.array([])) |
| ) |
| ghost_circuits.extend(attention_ghosts) |
| |
| |
| if "hidden_states" in pre_state and "hidden_states" in post_state: |
| hidden_ghosts = self._extract_hidden_ghosts( |
| pre_state["hidden_states"], |
| post_state["hidden_states"] |
| ) |
| ghost_circuits.extend(hidden_ghosts) |
| |
| |
| self.ghost_circuits = ghost_circuits |
| |
| logger.info(f"Extracted {len(ghost_circuits)} ghost circuits") |
| return ghost_circuits |
| |
| def classify_ghost_circuits(self) -> Dict[str, List[Dict[str, Any]]]: |
| """ |
| △ OBSERVE: Classify detected ghost circuits by type |
| |
| This method organizes detected ghost circuits into categories |
| based on their type and characteristics. |
| |
| Returns: |
| Dictionary mapping circuit types to lists of ghost circuits |
| """ |
| if not self.ghost_circuits: |
| logger.warning("No ghost circuits to classify") |
| return {} |
| |
| |
| classified = {} |
| for ghost in self.ghost_circuits: |
| circuit_type = ghost.get("circuit_type", "unknown") |
| if circuit_type not in classified: |
| classified[circuit_type] = [] |
| classified[circuit_type].append(ghost) |
| |
| return classified |
| |
| def measure_residue_strength(self) -> float: |
| """ |
| ∞ TRACE: Measure the overall strength of residual activations |
| |
| This method quantifies the overall strength of ghost circuits |
| relative to the primary activation paths. |
| |
| Returns: |
| Residue strength score (0.0 = no residue, 1.0 = equal to primary) |
| """ |
| if not self.ghost_circuits: |
| return 0.0 |
| |
| |
| activations = [ghost.get("activation", 0.0) for ghost in self.ghost_circuits] |
| return float(np.mean(activations)) |
| |
| def amplify_ghosts(self, factor: Optional[float] = None) -> List[Dict[str, Any]]: |
| """ |
| ✰ COLLAPSE: Amplify ghost circuit signals for better detection |
| |
| This method amplifies the activation values of ghost circuits |
| to make them more apparent for analysis. |
| |
| Args: |
| factor: Amplification factor (overrides instance value if provided) |
| |
| Returns: |
| List of amplified ghost circuits |
| """ |
| if not self.ghost_circuits: |
| logger.warning("No ghost circuits to amplify") |
| return [] |
| |
| |
| amp_factor = factor if factor is not None else self.amplification_factor |
| |
| |
| amplified = [] |
| for ghost in self.ghost_circuits: |
| amp_ghost = ghost.copy() |
| amp_ghost["activation"] = min(1.0, ghost.get("activation", 0.0) * amp_factor) |
| amplified.append(amp_ghost) |
| |
| logger.info(f"Amplified ghost circuits by factor {amp_factor}") |
| return amplified |
| |
| def _extract_attention_ghosts( |
| self, |
| pre_attention: np.ndarray, |
| post_attention: np.ndarray |
| ) -> List[Dict[str, Any]]: |
| """ |
| Extract ghost circuits from attention patterns. |
| |
| Args: |
| pre_attention: Attention weights before collapse |
| post_attention: Attention weights after collapse |
| |
| Returns: |
| List of attention-based ghost circuits |
| """ |
| ghost_circuits = [] |
| |
| |
| if pre_attention.size == 0 or post_attention.size == 0: |
| return ghost_circuits |
| |
| if pre_attention.shape != post_attention.shape: |
| logger.warning(f"Attention shape mismatch: {pre_attention.shape} vs {post_attention.shape}") |
| |
| min_shape = tuple(min(a, b) for a, b in zip(pre_attention.shape, post_attention.shape)) |
| pre_attention = pre_attention[tuple(slice(0, d) for d in min_shape)] |
| post_attention = post_attention[tuple(slice(0, d) for d in min_shape)] |
| |
| |
| |
| if pre_attention.ndim >= 2 and post_attention.ndim >= 2: |
| num_heads = pre_attention.shape[0] |
| seq_len = pre_attention.shape[1] |
| |
| for head in range(num_heads): |
| for i in range(seq_len): |
| for j in range(seq_len): |
| pre_val = pre_attention[head, i, j] if pre_attention.ndim > 2 else pre_attention[i, j] |
| post_val = post_attention[head, i, j] if post_attention.ndim > 2 else post_attention[i, j] |
| |
| if post_val < pre_val and post_val > self.activation_threshold: |
| |
| ghost_idx = len(ghost_circuits) |
| ghost = { |
| "circuit_id": f"attention_ghost_{ghost_idx}", |
| "activation": float(post_val), |
| "circuit_type": "attention", |
| "source_tokens": [f"token_{i}"], |
| "target_tokens": [f"token_{j}"], |
| "heads": [head], |
| "layers": [], |
| "metadata": { |
| "pre_activation": float(pre_val), |
| "activation_delta": float(pre_val - post_val), |
| "decay_ratio": float(post_val / pre_val) if pre_val > 0 else 0.0 |
| } |
| } |
| ghost_circuits.append(ghost) |
| |
| return ghost_circuits |
| |
| def _extract_hidden_ghosts( |
| self, |
| pre_hidden: np.ndarray, |
| post_hidden: np.ndarray |
| ) -> List[Dict[str, Any]]: |
| """ |
| Extract ghost circuits from hidden state activations. |
| |
| Args: |
| pre_hidden: Hidden states before collapse |
| post_hidden: Hidden states after collapse |
| |
| Returns: |
| List of hidden-state-based ghost circuits |
| """ |
| ghost_circuits = [] |
| |
| |
| if pre_hidden.size == 0 or post_hidden.size == 0: |
| return ghost_circuits |
| |
| if pre_hidden.shape != post_hidden.shape: |
| logger.warning(f"Hidden state shape mismatch: {pre_hidden.shape} vs {post_hidden.shape}") |
| return ghost_circuits |
| |
| |
| |
| if pre_hidden.ndim >= 2 and post_hidden.ndim >= 2: |
| |
| if pre_hidden.ndim > 2: |
| pre_agg = np.mean(pre_hidden, axis=0) |
| post_agg = np.mean(post_hidden, axis=0) |
| else: |
| pre_agg = pre_hidden |
| post_agg = post_hidden |
| |
| seq_len, hidden_dim = pre_agg.shape |
| |
| |
| sample_size = min(hidden_dim, 100) |
| sampled_dims = np.random.choice(hidden_dim, sample_size, replace=False) |
| |
| for pos in range(seq_len): |
| for dim_idx, dim in enumerate(sampled_dims): |
| pre_val = pre_agg[pos, dim] |
| post_val = post_agg[pos, dim] |
| |
| if post_val < pre_val and abs(post_val) > self.activation_threshold: |
| |
| ghost_idx = len(ghost_circuits) |
| ghost = { |
| "circuit_id": f"hidden_ghost_{ghost_idx}", |
| "activation": float(abs(post_val)), |
| "circuit_type": "hidden_state", |
| "source_tokens": [f"token_{pos}"], |
| "target_tokens": [], |
| "heads": [], |
| "layers": [], |
| "metadata": { |
| "position": pos, |
| "dimension": int(dim), |
| "pre_activation": float(pre_val), |
| "activation_delta": float(pre_val - post_val), |
| "decay_ratio": float(post_val / pre_val) if pre_val != 0 else 0.0 |
| } |
| } |
| ghost_circuits.append(ghost) |
| |
| return ghost_circuits |
|
|
|
|
| if __name__ == "__main__": |
| |
| |
| |
| pre_state = { |
| "attention_weights": np.random.random((8, 10, 10)), |
| "hidden_states": np.random.random((1, 10, 768)) |
| } |
| |
| |
| post_state = { |
| "attention_weights": pre_state["attention_weights"] * np.random.uniform(0.5, 1.0, pre_state["attention_weights"].shape), |
| "hidden_states": pre_state["hidden_states"] * np.random.uniform(0.5, 1.0, pre_state["hidden_states"].shape) |
| } |
| |
| |
| tracker = ResidueTracker(amplification_factor=1.5) |
| ghosts = tracker.extract_ghost_circuits(pre_state, post_state) |
| |
| |
| print(f"Extracted {len(ghosts)} ghost circuits") |
| |
| |
| classified = tracker.classify_ghost_circuits() |
| for circuit_type, circuits in classified.items(): |
| print(f" {circuit_type}: {len(circuits)} circuits") |
| |
| |
| strength = tracker.measure_residue_strength() |
| print(f"Residue strength: {strength:.3f}") |
| |
| |
| amplified = tracker.amplify_ghosts(factor=2.0) |
| print(f"Amplified {len(amplified)} ghost circuits") |
|
|