Spaces:

specimba
/

nexus-os-space

Running

App Files Files Community

specimba commited on 6 days ago

Commit

375fc30

verified ·

1 Parent(s): baea714

Copy nexus_os_v2/ckplug_retriever.py from dataset for module imports

Browse files

Files changed (1) hide show

nexus_os_v2/ckplug_retriever.py +192 -0

nexus_os_v2/ckplug_retriever.py ADDED Viewed

	@@ -0,0 +1,192 @@

+"""
+CK-PLUG Integration for NEXUS OS v2
+Implements Confidence Gain (CG) as the concrete μ_ret chemical potential.
+Paper: arXiv:2503.15888 — Parameters vs. Context: Fine-Grained Control
+of Knowledge Reliance in Language Models
+Model-specific ε thresholds (from Appendix B):
+  LLaMA2-7B: -2   |  LLaMA3-8B: -1
+  Mistral-0.3-7B: -1  |  Qwen2.5-7B: -3
+  For general use: default ε = -1
+"""
+import math
+import torch
+from typing import List, Optional, Dict, Tuple, Callable
+from dataclasses import dataclass
+@dataclass
+class TokenModulation:
+    """Result of CK-PLUG token-level modulation."""
+    token_id: int
+    original_prob: float
+    modulated_prob: float
+    cg: float                  # Confidence Gain
+    H_para: float              # Entropy (query-only)
+    H_cont: float              # Entropy (query+retrieval)
+    was_modulated: bool        # True if this token was in V_head and CG < threshold
+    alpha: float               # Adaptive blending weight
+class CKPLUGCoupling:
+    """
+    Concrete implementation of the retrieval chemical potential μ_ret
+    from the NEXUS OS Landau-Ginzburg framework.
+    μ_ret(x) = μ_0 * grounding_score(x)
+    where grounding_score is derived from CK-PLUG Confidence Gain:
+      - CG > 0  → retrieval SUPPORTS parametric knowledge (high grounding)
+      - CG < 0  → retrieval CONFLICTS with parametric knowledge (low grounding)
+      - |CG|    → magnitude of confidence shift
+    """
+    def __init__(
+        self,
+        epsilon: float = -1.0,          # Model-specific detection threshold
+        top_k: int = 50,                 # Union top-k for V_head
+        mu_0: float = 0.5,              # Base chemical potential (from LG framework)
+        device: str = "cpu",
+    ):
+        self.epsilon = epsilon
+        self.top_k = top_k
+        self.mu_0 = mu_0
+        self.device = device
+    @staticmethod
+    def entropy(probs: torch.Tensor) -> float:
+        """Shannon entropy H = -Σ p_i log₂ p_i."""
+        p = probs[probs > 0]
+        return float(-(p * torch.log2(p)).sum().item())
+    @staticmethod
+    def confidence_gain(
+        p_query: torch.Tensor,       # p(x | X_q) — parametric only
+        p_rag: torch.Tensor,           # p(x | X_r + X_q) — with retrieval
+    ) -> Tuple[float, float, float]:
+        """
+        Returns: (CG, H_para, H_cont)
+        CG = H(p(x|X_q)) - H(p(x|X_r+X_q))
+        Positive CG  → retrieval supports (reduces entropy)
+        Negative CG  → retrieval conflicts (increases entropy)
+        """
+        H_para = CKPLUGCoupling.entropy(p_query)
+        H_cont = CKPLUGCoupling.entropy(p_rag)
+        CG = H_para - H_cont
+        return CG, H_para, H_cont
+    def compute_chemical_potential(
+        self,
+        p_query: torch.Tensor,
+        p_rag: torch.Tensor,
+    ) -> float:
+        """
+        Map CK-PLUG Confidence Gain to Landau-Ginzburg chemical potential μ_ret.
+        Logic:
+          CG >> 0   → retrieval strongly supports → μ_ret ≈ μ_0 (max grounding)
+          CG ≈ 0    → neutral → μ_ret ≈ 0 (no coupling)
+          CG << 0   → retrieval conflicts → μ_ret ≈ -μ_0 (adversarial)
+        We use a tanh-sigmoid for smooth interpolation:
+          μ_ret = μ_0 * tanh(CG / τ) where τ controls transition sharpness.
+        """
+        CG, _, _ = self.confidence_gain(p_query, p_rag)
+        tau = 0.5  # Transition width in nats
+        mu_ret = self.mu_0 * math.tanh(CG / tau)
+        return mu_ret
+    def modulate_token(
+        self,
+        p_query: torch.Tensor,        # Shape: (vocab_size,)
+        p_rag: torch.Tensor,          # Shape: (vocab_size,)
+    ) -> Tuple[torch.Tensor, TokenModulation]:
+        """
+        Apply CK-PLUG token-level modulation (Eq. 7-10 from paper).
+        Returns: (modulated_distribution, modulation_metadata)
+        """
+        CG, H_para, H_cont = self.confidence_gain(p_query, p_rag)
+        # Refined detection threshold (Eq. 11 / Appendix B)
+        threshold = self.epsilon * abs(H_cont)
+        if CG >= threshold:
+            # No conflict — pass through RAG distribution unchanged
+            return p_rag, TokenModulation(
+                token_id=-1, original_prob=0.0, modulated_prob=0.0,
+                cg=CG, H_para=H_para, H_cont=H_cont,
+                was_modulated=False, alpha=0.0,
+            )
+        # Conflict detected — apply modulation
+        # Eq. 5: Parameter-aware log probability
+        q_para = torch.log(p_query + 1e-10)
+        # Eq. 6: Context-aware log probability
+        q_cont = torch.log((p_rag + 1e-10) / (p_query + 1e-10))
+        # Eq. 10: Adaptive alpha
+        alpha = H_cont / (H_para + H_cont + 1e-10)
+        alpha = float(torch.clamp(torch.tensor(alpha), 0.0, 1.0).item())
+        # Build V_head: union of top-k from both distributions
+        topk_para = torch.topk(q_para, self.top_k).indices
+        topk_cont = torch.topk(q_cont, self.top_k).indices
+        V_head = torch.unique(torch.cat([topk_para, topk_cont]))
+        # Eq. 8: Modulation function F
+        F = torch.full_like(q_para, -float('inf'))
+        F[V_head] = alpha * q_para[V_head] + (1.0 - alpha) * q_cont[V_head]
+        # Softmax to get modulated distribution
+        p_mod = torch.softmax(F, dim=-1)
+        # Find most changed token for metadata
+        diff = torch.abs(p_rag - p_mod)
+        changed_id = int(torch.argmax(diff).item())
+        modulation = TokenModulation(
+            token_id=changed_id,
+            original_prob=float(p_rag[changed_id].item()),
+            modulated_prob=float(p_mod[changed_id].item()),
+            cg=CG, H_para=H_para, H_cont=H_cont,
+            was_modulated=True, alpha=alpha,
+        )
+        return p_mod, modulation
+    def batch_modulate(
+        self,
+        p_queries: List[torch.Tensor],  # List of (vocab_size,) tensors
+        p_rags: List[torch.Tensor],     # Same length
+    ) -> List[Tuple[torch.Tensor, TokenModulation]]:
+        """Apply CK-PLUG to a batch of token positions."""
+        return [self.modulate_token(pq, pr) for pq, pr in zip(p_queries, p_rags)]
+    def get_grounding_field(self, p_query: torch.Tensor, p_rag: torch.Tensor) -> float:
+        """
+        Return the scalar μ_ret value for insertion into Landau-Ginzburg functional.
+        This is the key bridge between CK-PLUG (empirical) and NEXUS OS physics.
+        """
+        return self.compute_chemical_potential(p_query, p_rag)
+# Model-specific epsilon presets (from CK-PLUG Appendix B)
+CKPLUG_PRESETS = {
+    "llama2": -2.0,
+    "llama3": -1.0,
+    "mistral": -1.0,
+    "qwen2.5": -3.0,
+    "granite": -1.5,     # Estimated from paper patterns
+    "gemma": -1.0,       # Estimated
+    "deepseek": -2.0,    # Estimated (large MoE, conservative)
+    "default": -1.0,
+}
+def get_preset_epsilon(model_family: str) -> float:
+    """Get recommended epsilon for a model family."""
+    key = model_family.lower()
+    for k, v in CKPLUG_PRESETS.items():
+        if k in key:
+            return v
+    return CKPLUG_PRESETS["default"]