Spaces:

karlexmarin
/

taf-agent

Running

karlexmarin Claude Opus 4.7 (1M context) commited on 23 days ago

Commit

5d885d7

1 Parent(s): 959e23c

v0.4: add 4 diagnostic functions from sesion 31

- architectural_concentration_predict: γ_text = γ_Padé - 0.012·n_kv (R²=0.30 cross-panel)
- padé_deviation_index (PDI): d_horizon_obs/T_eval ratio with traffic light
- precision_shift_predict_4bit: R²(bf16) direction rule for 4-bit shifts
- critical_exponents_bundle: ν_c, β_c, η_c=γ-1 (CORRECTED), α_C, γ_susc
+ AM-GM bound γ_susc ≥ 2√2 at γ=1-1/√2

i18n EN/ES/FR/ZH for all 4 new diagnostic labels and descriptions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

js/i18n.js +53 -0
python/taf_browser.py +158 -0

js/i18n.js CHANGED Viewed

@@ -217,9 +217,34 @@ export const TRANSLATIONS = {
     "help.source.body":         "Source code: <a href=\"https://github.com/karlesmarin/tafagent\" target=\"_blank\">github.com/karlesmarin/tafagent</a><br>Paper: <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href=\"https://zenodo.org/records/19826343\" target=\"_blank\">Zenodo</a>; arXiv forthcoming)<br>Dataset: <a href=\"https://huggingface.co/datasets/karlexmarin/taf-attention-decay\" target=\"_blank\">taf-attention-decay</a> — 58 γ-measurements across 32 models (CC-BY-4.0)",
     "footer.text":             "© 2026 Carles Marin · Apache-2.0 · independent research · the tool that closes the loop of the paper.",
   },
   es: {
     "hero.title":     "🔬 TAF Agent",
     "hero.tagline":   "Prueba <strong>CUALQUIER</strong> LLM transformer antes de gastar GPU/€.",
     "hero.subtitle":  "Todo el cómputo corre localmente en tu navegador. Gratis. Sin límites. Auditable.",
@@ -428,7 +453,21 @@ export const TRANSLATIONS = {
     "footer.text":             "© 2026 Carles Marin · Apache-2.0 · investigación independiente · la herramienta que cierra el círculo del paper.",
   },
   fr: {
     "hero.title":     "🔬 TAF Agent",
     "hero.tagline":   "Testez <strong>N'IMPORTE QUEL</strong> LLM transformer avant de dépenser du GPU/€.",
     "hero.subtitle":  "Tout le calcul s'exécute localement dans votre navigateur. Gratuit. Illimité. Auditable.",
@@ -636,7 +675,21 @@ export const TRANSLATIONS = {
     "footer.text":             "© 2026 Carles Marin · Apache-2.0 · recherche indépendante · l'outil qui ferme la boucle du paper.",
   },
   zh: {
     "hero.title":     "🔬 TAF Agent",
     "hero.tagline":   "在花费 GPU/$ 之前，测试<strong>任意</strong> Transformer LLM。",
     "hero.subtitle":  "所有计算在您的浏览器本地运行。免费。无限制。可审计。",

     "help.source.body":         "Source code: <a href=\"https://github.com/karlesmarin/tafagent\" target=\"_blank\">github.com/karlesmarin/tafagent</a><br>Paper: <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href=\"https://zenodo.org/records/19826343\" target=\"_blank\">Zenodo</a>; arXiv forthcoming)<br>Dataset: <a href=\"https://huggingface.co/datasets/karlexmarin/taf-attention-decay\" target=\"_blank\">taf-attention-decay</a> — 58 γ-measurements across 32 models (CC-BY-4.0)",
     "footer.text":             "© 2026 Carles Marin · Apache-2.0 · independent research · the tool that closes the loop of the paper.",
+    // §33 v0.4 (sesion 31, 2026-04-30) — new diagnostic functions
+    "v04.title":                  "🆕 v0.4 — New diagnostics (sesion 31)",
+    "v04.arch.label":             "Architectural Concentration",
+    "v04.arch.desc":              "γ_text ≈ γ_Padé − 0.012·n_kv. Cross-panel correlational law (R²=0.30). Caveat: not per-model predictor.",
+    "v04.pdi.label":              "PDI — Padé Deviation Index",
+    "v04.pdi.desc":               "PDI = d_horizon_obs/T_eval. Traffic light: green (≈1), orange (>>1), yellow (<<1), red (Phase B negative).",
+    "v04.4bit.label":             "4-bit Shift Predictor",
+    "v04.4bit.desc":              "MHA: R²(bf16)<0.9 → γ rises; R²>0.99 → γ drops. GQA: precision-robust regardless.",
+    "v04.crit.label":             "Critical Exponents Bundle",
+    "v04.crit.desc":              "ν_c, β_c, η_c (=γ−1, CORRECTED), α_C, γ_susc with AM-GM minimum at γ=1−1/√2≈0.293.",
   },
+  // ────────────────────────────────────────────────────────────────────────
+  // ES — Español
+  // ────────────────────────────────────────────────────────────────────────
   es: {
+    // §33 v0.4 (sesion 31, 2026-04-30) — nuevas funciones diagnósticas
+    "v04.title":                  "🆕 v0.4 — Nuevos diagnósticos (sesion 31)",
+    "v04.arch.label":             "Concentración Arquitectural",
+    "v04.arch.desc":              "γ_text ≈ γ_Padé − 0.012·n_kv. Ley correlacional cross-panel (R²=0.30). Caveat: no es predictor per-model.",
+    "v04.pdi.label":              "PDI — Índice de Desviación de Padé",
+    "v04.pdi.desc":               "PDI = d_horizon_obs/T_eval. Semáforo: verde (≈1), naranja (>>1), amarillo (<<1), rojo (Phase B negativo).",
+    "v04.4bit.label":             "Predictor de Shift 4-bit",
+    "v04.4bit.desc":              "MHA: R²(bf16)<0.9 → γ sube; R²>0.99 → γ baja. GQA: precision-robusto.",
+    "v04.crit.label":             "Bundle de Exponentes Críticos",
+    "v04.crit.desc":              "ν_c, β_c, η_c (=γ−1, CORREGIDO), α_C, γ_susc con mínimo AM-GM en γ=1−1/√2≈0.293.",
     "hero.title":     "🔬 TAF Agent",
     "hero.tagline":   "Prueba <strong>CUALQUIER</strong> LLM transformer antes de gastar GPU/€.",
     "hero.subtitle":  "Todo el cómputo corre localmente en tu navegador. Gratis. Sin límites. Auditable.",
     "footer.text":             "© 2026 Carles Marin · Apache-2.0 · investigación independiente · la herramienta que cierra el círculo del paper.",
   },
+  // ────────────────────────────────────────────────────────────────────────
+  // FR — Français
+  // ────────────────────────────────────────────────────────────────────────
   fr: {
+    // §33 v0.4 (sesion 31, 2026-04-30) — nouvelles fonctions de diagnostic
+    "v04.title":                  "🆕 v0.4 — Nouveaux diagnostics (sesion 31)",
+    "v04.arch.label":             "Concentration Architecturale",
+    "v04.arch.desc":              "γ_text ≈ γ_Padé − 0.012·n_kv. Loi corrélationnelle cross-panel (R²=0.30). Caveat : pas un prédicteur par-modèle.",
+    "v04.pdi.label":              "PDI — Indice de Déviation de Padé",
+    "v04.pdi.desc":               "PDI = d_horizon_obs/T_eval. Feu : vert (≈1), orange (>>1), jaune (<<1), rouge (Phase B négatif).",
+    "v04.4bit.label":             "Prédicteur de Décalage 4-bit",
+    "v04.4bit.desc":              "MHA : R²(bf16)<0.9 → γ monte ; R²>0.99 → γ descend. GQA : précision-robuste.",
+    "v04.crit.label":             "Ensemble d'Exposants Critiques",
+    "v04.crit.desc":              "ν_c, β_c, η_c (=γ−1, CORRIGÉ), α_C, γ_susc avec minimum AM-GM à γ=1−1/√2≈0.293.",
     "hero.title":     "🔬 TAF Agent",
     "hero.tagline":   "Testez <strong>N'IMPORTE QUEL</strong> LLM transformer avant de dépenser du GPU/€.",
     "hero.subtitle":  "Tout le calcul s'exécute localement dans votre navigateur. Gratuit. Illimité. Auditable.",
     "footer.text":             "© 2026 Carles Marin · Apache-2.0 · recherche indépendante · l'outil qui ferme la boucle du paper.",
   },
+  // ────────────────────────────────────────────────────────────────────────
+  // ZH — 中文
+  // ────────────────────────────────────────────────────────────────────────
   zh: {
+    // §33 v0.4 (sesion 31, 2026-04-30) — 新诊断功能
+    "v04.title":                  "🆕 v0.4 — 新诊断 (会话 31)",
+    "v04.arch.label":             "架构集中度",
+    "v04.arch.desc":              "γ_text ≈ γ_Padé − 0.012·n_kv。跨面板相关性定律（R²=0.30）。警告：不是逐模型预测器。",
+    "v04.pdi.label":              "PDI — Padé 偏差指数",
+    "v04.pdi.desc":               "PDI = d_horizon_obs/T_eval。交通灯：绿色（≈1）、橙色（>>1）、黄色（<<1）、红色（B 阶段负值）。",
+    "v04.4bit.label":             "4 位精度移位预测器",
+    "v04.4bit.desc":              "MHA: R²(bf16)<0.9 → γ 上升；R²>0.99 → γ 下降。GQA: 精度稳健。",
+    "v04.crit.label":             "临界指数捆绑",
+    "v04.crit.desc":              "ν_c、β_c、η_c (=γ−1, 已修正)、α_C、γ_susc，AM-GM 最小值在 γ=1−1/√2≈0.293。",
     "hero.title":     "🔬 TAF Agent",
     "hero.tagline":   "在花费 GPU/$ 之前，测试<strong>任意</strong> Transformer LLM。",
     "hero.subtitle":  "所有计算在您的浏览器本地运行。免费。无限制。可审计。",

python/taf_browser.py CHANGED Viewed

@@ -1007,6 +1007,164 @@ def hagedorn_safety_alert(gamma: float) -> dict:
     }
 def bimodal_phase_class(gamma: float) -> str:
     """§32.2 — Bimodal classifier (paper 2 §4 finding F11).

     }
+# ════════════════════════════════════════════════════════════════════════════
+# §33 — Sesion 31 (2026-04-30) findings — added to TAF v0.4
+# Architectural concentration law, PDI, 4-bit R²-direction rule, critical exponents
+# ════════════════════════════════════════════════════════════════════════════
+def architectural_concentration_predict(gamma_pade_val: float, n_kv: int) -> dict:
+    """§33.1 — Architectural concentration law (paper 2 NEW, sesion 31).
+    γ_text ≈ γ_Padé − 0.012·n_kv
+    R² = 0.30 cross-panel (n=22) vs Padé alone R²=0.02.
+    IMPORTANT: This is a CORRELATIONAL law, NOT per-model predictor.
+    Mean per-model |err| = 0.27, WORSE than Padé alone (0.24).
+    Use as CROSS-PANEL diagnostic, not individual prediction.
+    """
+    k_arch = 0.012  # panel-fit coefficient (n=22), not derived from first principles
+    gamma_predicted = gamma_pade_val - k_arch * n_kv
+    return {
+        "gamma_pade": gamma_pade_val,
+        "n_kv": n_kv,
+        "k_arch": k_arch,
+        "gamma_text_predicted": gamma_predicted,
+        "caveat": "Correlational, not per-model predictor (R²=0.30, mean err 0.27)",
+        "interpretation": (
+            "GQA aggressive (low n_kv) → γ pushed up toward Hagedorn. "
+            "MHA full (n_kv=32) → γ drops below Padé (sink-prone)."
+        ),
+    }
+def padé_deviation_index(theta: float, gamma_obs: float, T_eval: int) -> dict:
+    """§33.2 — PDI Padé Deviation Index (paper 2 NEW, sesion 31).
+    PDI = d_horizon_obs / T_eval = θ(1−γ_obs)√2 / ((1+γ_obs)·T_eval)
+    Identity (D-NEW-1): PDI = 1 ⟺ γ_obs = γ_Padé(θ, T_eval)
+    Diagnostic value:
+      PDI ≈ 1: canonical (model matches Padé)
+      PDI > 1.5: γ_obs < γ_Padé (sink-dominated, code/instruct shift)
+      PDI < 0.5: γ_obs > γ_Padé but < 1 (over-concentrated)
+      PDI < 0: γ_obs > 1 (Phase B, formula sign-flips)
+    Equivalent log scale: log(PDI) + ΔH_Cardy = 0 (D-DEEP-15).
+    """
+    if gamma_obs == -1.0:
+        return {"PDI": float('inf'), "regime": "singular"}
+    pdi = theta * (1 - gamma_obs) * math.sqrt(2) / ((1 + gamma_obs) * T_eval)
+    if pdi < 0:
+        regime = "Phase B (γ>1, formula sign-flip)"
+        traffic = "🔴 RED — Phase B, NTK extension required"
+    elif 0.5 <= pdi <= 1.5:
+        regime = "canonical (γ_obs ≈ γ_Padé)"
+        traffic = "🟢 GREEN — model matches Padé prediction"
+    elif pdi > 1.5:
+        regime = "γ_obs << γ_Padé (sink-dominated or extreme alignment)"
+        traffic = "🟠 ORANGE — large positive deviation"
+    else:  # 0 < pdi < 0.5
+        regime = "γ_obs > γ_Padé (over-concentrated in Phase A)"
+        traffic = "🟡 YELLOW — moderate deviation"
+    return {
+        "PDI": pdi,
+        "log_PDI": math.log(pdi) if pdi > 0 else None,
+        "regime": regime,
+        "traffic_light": traffic,
+        "identity": "log(PDI) + ΔH_Cardy = 0 (use either, log-inverse)",
+    }
+def precision_shift_predict_4bit(gamma_bf16: float, R2_bf16: float, is_GQA: bool) -> dict:
+    """§33.3 — 4-bit precision shift direction predictor (paper 2 NEW, sesion 31).
+    Empirical n=5 rule: bf16 R² of power-law fit predicts 4-bit shift direction.
+    For MHA models:
+      R²(bf16) < 0.9 (sink-dominated): 4-bit shifts γ UP toward γ_Padé
+      R²(bf16) > 0.99 (clean): 4-bit shifts γ DOWN (introduces noise)
+      0.95 ≤ R² ≤ 0.99: stable (~no shift)
+    For GQA models: precision-robust regardless (|Δγ| < 0.05).
+    """
+    if is_GQA:
+        return {
+            "predicted_shift_direction": "stable",
+            "expected_magnitude": "|Δγ| < 0.05",
+            "reason": "GQA KV-sharing distributes attention; little long-tail to perturb",
+            "recommendation": "Either bf16 or 4-bit OK for deployment",
+        }
+    # MHA case — depends on R²
+    if R2_bf16 < 0.9:
+        direction = "UP"
+        magnitude = "+0.3 to +0.8 expected"
+        reason = "Sink-dominated bf16: 4-bit truncates long-tail, reveals Padé prediction"
+    elif R2_bf16 > 0.99:
+        direction = "DOWN"
+        magnitude = "−0.2 to −0.4 expected"
+        reason = "Clean bf16: 4-bit further sparsifies, introduces non-monotonicity"
+    else:
+        direction = "stable"
+        magnitude = "|Δγ| < 0.05"
+        reason = "Borderline R², 4-bit minimal effect"
+    return {
+        "predicted_shift_direction": direction,
+        "expected_magnitude": magnitude,
+        "reason": reason,
+        "evidence": "n=5 paired measurements (DeepSeek/Pythia-1B/Pythia-2.8B/Llama-3/Qwen-7B-Inst)",
+        "caveat": "R²-direction rule is empirical, not formally derived",
+    }
+def critical_exponents_bundle(gamma: float) -> dict:
+    """§33.4 — Critical exponents bundle (paper 2 NEW, sesion 31 + GAME-O).
+    Returns ν_c (correlation length), β_c (order parameter), η_c (anomalous dim),
+    α_C (specific heat), γ_susc (susceptibility) as functions of γ.
+    Hyperscaling consistent (Rushbrooke + Josephson, d=1).
+    NEW IDENTITY (GAME-P recursive):
+      γ_susc(γ) = 1/(1−γ) + 2(1−γ) ≥ 2√2 (AM-GM bound)
+      Minimum at γ = 1 − 1/√2 ≈ 0.293
+      Equals c_central=3 at γ=0 AND γ=1/2
+    """
+    if gamma >= 1:
+        return {"regime": "Hagedorn or beyond", "exponents": "diverge"}
+    nu_c = 1 / (1 - gamma)
+    beta_c = gamma - 1
+    eta_c = gamma - 1  # CORRECTED from paper 1's η=2γ (Lévy mapping consistent with hyperscaling)
+    alpha_C = 2 - 1 / (1 - gamma)
+    gamma_susc = 1 / (1 - gamma) + 2 * (1 - gamma)
+    # AM-GM bound check
+    gamma_min = 1 - 1 / math.sqrt(2)
+    gamma_susc_min = 2 * math.sqrt(2)
+    return {
+        "nu_correlation_length": nu_c,
+        "beta_order_param": beta_c,
+        "eta_anomalous_dim": eta_c,
+        "eta_note": "η_c = γ−1 (Lévy-derived, hyperscaling-consistent). Paper 1 claim η=2γ is INCORRECT.",
+        "alpha_specific_heat": alpha_C,
+        "gamma_susceptibility": gamma_susc,
+        "c_central_at_gamma_0": 3,  # γ_susc(γ=0) = 3 = c_central
+        "AM_GM_bound": {
+            "min_gamma_susc": gamma_susc_min,
+            "min_at_gamma": gamma_min,
+            "interpretation": (
+                f"γ_susc has UNIVERSAL minimum {gamma_susc_min:.3f} at γ = {gamma_min:.4f} "
+                "(AM-GM with ab=2 product constant)"
+            ),
+        },
+        "hyperscaling_check": {
+            "Rushbrooke (α + 2β + γ_susc = 2)": alpha_C + 2 * beta_c + gamma_susc,
+            "expected": 2,
+        },
+        "warning_paper1_eta": "Paper 1's η_c = 2γ is INCORRECT. Use η_c = γ-1 (this function).",
+    }
 def bimodal_phase_class(gamma: float) -> str:
     """§32.2 — Bimodal classifier (paper 2 §4 finding F11).