karlexmarin Claude Opus 4.7 (1M context) commited on
Commit
5d885d7
·
1 Parent(s): 959e23c

v0.4: add 4 diagnostic functions from sesion 31

Browse files

- architectural_concentration_predict: γ_text = γ_Padé - 0.012·n_kv (R²=0.30 cross-panel)
- padé_deviation_index (PDI): d_horizon_obs/T_eval ratio with traffic light
- precision_shift_predict_4bit: R²(bf16) direction rule for 4-bit shifts
- critical_exponents_bundle: ν_c, β_c, η_c=γ-1 (CORRECTED), α_C, γ_susc
+ AM-GM bound γ_susc ≥ 2√2 at γ=1-1/√2

i18n EN/ES/FR/ZH for all 4 new diagnostic labels and descriptions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. js/i18n.js +53 -0
  2. python/taf_browser.py +158 -0
js/i18n.js CHANGED
@@ -217,9 +217,34 @@ export const TRANSLATIONS = {
217
  "help.source.body": "Source code: <a href=\"https://github.com/karlesmarin/tafagent\" target=\"_blank\">github.com/karlesmarin/tafagent</a><br>Paper: <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href=\"https://zenodo.org/records/19826343\" target=\"_blank\">Zenodo</a>; arXiv forthcoming)<br>Dataset: <a href=\"https://huggingface.co/datasets/karlexmarin/taf-attention-decay\" target=\"_blank\">taf-attention-decay</a> — 58 γ-measurements across 32 models (CC-BY-4.0)",
218
 
219
  "footer.text": "© 2026 Carles Marin · Apache-2.0 · independent research · the tool that closes the loop of the paper.",
 
 
 
 
 
 
 
 
 
 
 
220
  },
221
 
 
 
 
222
  es: {
 
 
 
 
 
 
 
 
 
 
 
223
  "hero.title": "🔬 TAF Agent",
224
  "hero.tagline": "Prueba <strong>CUALQUIER</strong> LLM transformer antes de gastar GPU/€.",
225
  "hero.subtitle": "Todo el cómputo corre localmente en tu navegador. Gratis. Sin límites. Auditable.",
@@ -428,7 +453,21 @@ export const TRANSLATIONS = {
428
  "footer.text": "© 2026 Carles Marin · Apache-2.0 · investigación independiente · la herramienta que cierra el círculo del paper.",
429
  },
430
 
 
 
 
431
  fr: {
 
 
 
 
 
 
 
 
 
 
 
432
  "hero.title": "🔬 TAF Agent",
433
  "hero.tagline": "Testez <strong>N'IMPORTE QUEL</strong> LLM transformer avant de dépenser du GPU/€.",
434
  "hero.subtitle": "Tout le calcul s'exécute localement dans votre navigateur. Gratuit. Illimité. Auditable.",
@@ -636,7 +675,21 @@ export const TRANSLATIONS = {
636
  "footer.text": "© 2026 Carles Marin · Apache-2.0 · recherche indépendante · l'outil qui ferme la boucle du paper.",
637
  },
638
 
 
 
 
639
  zh: {
 
 
 
 
 
 
 
 
 
 
 
640
  "hero.title": "🔬 TAF Agent",
641
  "hero.tagline": "在花费 GPU/$ 之前,测试<strong>任意</strong> Transformer LLM。",
642
  "hero.subtitle": "所有计算在您的浏览器本地运行。免费。无限制。可审计。",
 
217
  "help.source.body": "Source code: <a href=\"https://github.com/karlesmarin/tafagent\" target=\"_blank\">github.com/karlesmarin/tafagent</a><br>Paper: <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href=\"https://zenodo.org/records/19826343\" target=\"_blank\">Zenodo</a>; arXiv forthcoming)<br>Dataset: <a href=\"https://huggingface.co/datasets/karlexmarin/taf-attention-decay\" target=\"_blank\">taf-attention-decay</a> — 58 γ-measurements across 32 models (CC-BY-4.0)",
218
 
219
  "footer.text": "© 2026 Carles Marin · Apache-2.0 · independent research · the tool that closes the loop of the paper.",
220
+
221
+ // §33 v0.4 (sesion 31, 2026-04-30) — new diagnostic functions
222
+ "v04.title": "🆕 v0.4 — New diagnostics (sesion 31)",
223
+ "v04.arch.label": "Architectural Concentration",
224
+ "v04.arch.desc": "γ_text ≈ γ_Padé − 0.012·n_kv. Cross-panel correlational law (R²=0.30). Caveat: not per-model predictor.",
225
+ "v04.pdi.label": "PDI — Padé Deviation Index",
226
+ "v04.pdi.desc": "PDI = d_horizon_obs/T_eval. Traffic light: green (≈1), orange (>>1), yellow (<<1), red (Phase B negative).",
227
+ "v04.4bit.label": "4-bit Shift Predictor",
228
+ "v04.4bit.desc": "MHA: R²(bf16)<0.9 → γ rises; R²>0.99 → γ drops. GQA: precision-robust regardless.",
229
+ "v04.crit.label": "Critical Exponents Bundle",
230
+ "v04.crit.desc": "ν_c, β_c, η_c (=γ−1, CORRECTED), α_C, γ_susc with AM-GM minimum at γ=1−1/√2≈0.293.",
231
  },
232
 
233
+ // ────────────────────────────────────────────────────────────────────────
234
+ // ES — Español
235
+ // ────────────────────────────────────────────────────────────────────────
236
  es: {
237
+ // §33 v0.4 (sesion 31, 2026-04-30) — nuevas funciones diagnósticas
238
+ "v04.title": "🆕 v0.4 — Nuevos diagnósticos (sesion 31)",
239
+ "v04.arch.label": "Concentración Arquitectural",
240
+ "v04.arch.desc": "γ_text ≈ γ_Padé − 0.012·n_kv. Ley correlacional cross-panel (R²=0.30). Caveat: no es predictor per-model.",
241
+ "v04.pdi.label": "PDI — Índice de Desviación de Padé",
242
+ "v04.pdi.desc": "PDI = d_horizon_obs/T_eval. Semáforo: verde (≈1), naranja (>>1), amarillo (<<1), rojo (Phase B negativo).",
243
+ "v04.4bit.label": "Predictor de Shift 4-bit",
244
+ "v04.4bit.desc": "MHA: R²(bf16)<0.9 → γ sube; R²>0.99 → γ baja. GQA: precision-robusto.",
245
+ "v04.crit.label": "Bundle de Exponentes Críticos",
246
+ "v04.crit.desc": "ν_c, β_c, η_c (=γ−1, CORREGIDO), α_C, γ_susc con mínimo AM-GM en γ=1−1/√2≈0.293.",
247
+
248
  "hero.title": "🔬 TAF Agent",
249
  "hero.tagline": "Prueba <strong>CUALQUIER</strong> LLM transformer antes de gastar GPU/€.",
250
  "hero.subtitle": "Todo el cómputo corre localmente en tu navegador. Gratis. Sin límites. Auditable.",
 
453
  "footer.text": "© 2026 Carles Marin · Apache-2.0 · investigación independiente · la herramienta que cierra el círculo del paper.",
454
  },
455
 
456
+ // ────────────────────────────────────────────────────────────────────────
457
+ // FR — Français
458
+ // ────────────────────────────────────────────────────────────────────────
459
  fr: {
460
+ // §33 v0.4 (sesion 31, 2026-04-30) — nouvelles fonctions de diagnostic
461
+ "v04.title": "🆕 v0.4 — Nouveaux diagnostics (sesion 31)",
462
+ "v04.arch.label": "Concentration Architecturale",
463
+ "v04.arch.desc": "γ_text ≈ γ_Padé − 0.012·n_kv. Loi corrélationnelle cross-panel (R²=0.30). Caveat : pas un prédicteur par-modèle.",
464
+ "v04.pdi.label": "PDI — Indice de Déviation de Padé",
465
+ "v04.pdi.desc": "PDI = d_horizon_obs/T_eval. Feu : vert (≈1), orange (>>1), jaune (<<1), rouge (Phase B négatif).",
466
+ "v04.4bit.label": "Prédicteur de Décalage 4-bit",
467
+ "v04.4bit.desc": "MHA : R²(bf16)<0.9 → γ monte ; R²>0.99 → γ descend. GQA : précision-robuste.",
468
+ "v04.crit.label": "Ensemble d'Exposants Critiques",
469
+ "v04.crit.desc": "ν_c, β_c, η_c (=γ−1, CORRIGÉ), α_C, γ_susc avec minimum AM-GM à γ=1−1/√2≈0.293.",
470
+
471
  "hero.title": "🔬 TAF Agent",
472
  "hero.tagline": "Testez <strong>N'IMPORTE QUEL</strong> LLM transformer avant de dépenser du GPU/€.",
473
  "hero.subtitle": "Tout le calcul s'exécute localement dans votre navigateur. Gratuit. Illimité. Auditable.",
 
675
  "footer.text": "© 2026 Carles Marin · Apache-2.0 · recherche indépendante · l'outil qui ferme la boucle du paper.",
676
  },
677
 
678
+ // ────────────────────────────────────────────────────────────────────────
679
+ // ZH — 中文
680
+ // ────────────────────────────────────────────────────────────────────────
681
  zh: {
682
+ // §33 v0.4 (sesion 31, 2026-04-30) — 新诊断功能
683
+ "v04.title": "🆕 v0.4 — 新诊断 (会话 31)",
684
+ "v04.arch.label": "架构集中度",
685
+ "v04.arch.desc": "γ_text ≈ γ_Padé − 0.012·n_kv。跨面板相关性定律(R²=0.30)。警告:不是逐模型预测器。",
686
+ "v04.pdi.label": "PDI — Padé 偏差指数",
687
+ "v04.pdi.desc": "PDI = d_horizon_obs/T_eval。交通灯:绿色(≈1)、橙色(>>1)、黄色(<<1)、红色(B 阶段负值)。",
688
+ "v04.4bit.label": "4 位精度移位预测器",
689
+ "v04.4bit.desc": "MHA: R²(bf16)<0.9 → γ 上升;R²>0.99 → γ 下降。GQA: 精度稳健。",
690
+ "v04.crit.label": "临界指数捆绑",
691
+ "v04.crit.desc": "ν_c、β_c、η_c (=γ−1, 已修正)、α_C、γ_susc,AM-GM 最小值在 γ=1−1/√2≈0.293。",
692
+
693
  "hero.title": "🔬 TAF Agent",
694
  "hero.tagline": "在花费 GPU/$ 之前,测试<strong>任意</strong> Transformer LLM。",
695
  "hero.subtitle": "所有计算在您的浏览器本地运行。免费。无限制。可审计。",
python/taf_browser.py CHANGED
@@ -1007,6 +1007,164 @@ def hagedorn_safety_alert(gamma: float) -> dict:
1007
  }
1008
 
1009
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1010
  def bimodal_phase_class(gamma: float) -> str:
1011
  """§32.2 — Bimodal classifier (paper 2 §4 finding F11).
1012
 
 
1007
  }
1008
 
1009
 
1010
+ # ════════════════════════════════════════════════════════════════════════════
1011
+ # §33 — Sesion 31 (2026-04-30) findings — added to TAF v0.4
1012
+ # Architectural concentration law, PDI, 4-bit R²-direction rule, critical exponents
1013
+ # ════════════════════════════════════════════════════════════════════════════
1014
+
1015
+ def architectural_concentration_predict(gamma_pade_val: float, n_kv: int) -> dict:
1016
+ """§33.1 — Architectural concentration law (paper 2 NEW, sesion 31).
1017
+
1018
+ γ_text ≈ γ_Padé − 0.012·n_kv
1019
+ R² = 0.30 cross-panel (n=22) vs Padé alone R²=0.02.
1020
+
1021
+ IMPORTANT: This is a CORRELATIONAL law, NOT per-model predictor.
1022
+ Mean per-model |err| = 0.27, WORSE than Padé alone (0.24).
1023
+ Use as CROSS-PANEL diagnostic, not individual prediction.
1024
+ """
1025
+ k_arch = 0.012 # panel-fit coefficient (n=22), not derived from first principles
1026
+ gamma_predicted = gamma_pade_val - k_arch * n_kv
1027
+ return {
1028
+ "gamma_pade": gamma_pade_val,
1029
+ "n_kv": n_kv,
1030
+ "k_arch": k_arch,
1031
+ "gamma_text_predicted": gamma_predicted,
1032
+ "caveat": "Correlational, not per-model predictor (R²=0.30, mean err 0.27)",
1033
+ "interpretation": (
1034
+ "GQA aggressive (low n_kv) → γ pushed up toward Hagedorn. "
1035
+ "MHA full (n_kv=32) → γ drops below Padé (sink-prone)."
1036
+ ),
1037
+ }
1038
+
1039
+
1040
+ def padé_deviation_index(theta: float, gamma_obs: float, T_eval: int) -> dict:
1041
+ """§33.2 — PDI Padé Deviation Index (paper 2 NEW, sesion 31).
1042
+
1043
+ PDI = d_horizon_obs / T_eval = θ(1−γ_obs)√2 / ((1+γ_obs)·T_eval)
1044
+
1045
+ Identity (D-NEW-1): PDI = 1 ⟺ γ_obs = γ_Padé(θ, T_eval)
1046
+ Diagnostic value:
1047
+ PDI ≈ 1: canonical (model matches Padé)
1048
+ PDI > 1.5: γ_obs < γ_Padé (sink-dominated, code/instruct shift)
1049
+ PDI < 0.5: γ_obs > γ_Padé but < 1 (over-concentrated)
1050
+ PDI < 0: γ_obs > 1 (Phase B, formula sign-flips)
1051
+
1052
+ Equivalent log scale: log(PDI) + ΔH_Cardy = 0 (D-DEEP-15).
1053
+ """
1054
+ if gamma_obs == -1.0:
1055
+ return {"PDI": float('inf'), "regime": "singular"}
1056
+ pdi = theta * (1 - gamma_obs) * math.sqrt(2) / ((1 + gamma_obs) * T_eval)
1057
+ if pdi < 0:
1058
+ regime = "Phase B (γ>1, formula sign-flip)"
1059
+ traffic = "🔴 RED — Phase B, NTK extension required"
1060
+ elif 0.5 <= pdi <= 1.5:
1061
+ regime = "canonical (γ_obs ≈ γ_Padé)"
1062
+ traffic = "🟢 GREEN — model matches Padé prediction"
1063
+ elif pdi > 1.5:
1064
+ regime = "γ_obs << γ_Padé (sink-dominated or extreme alignment)"
1065
+ traffic = "🟠 ORANGE — large positive deviation"
1066
+ else: # 0 < pdi < 0.5
1067
+ regime = "γ_obs > γ_Padé (over-concentrated in Phase A)"
1068
+ traffic = "🟡 YELLOW — moderate deviation"
1069
+ return {
1070
+ "PDI": pdi,
1071
+ "log_PDI": math.log(pdi) if pdi > 0 else None,
1072
+ "regime": regime,
1073
+ "traffic_light": traffic,
1074
+ "identity": "log(PDI) + ΔH_Cardy = 0 (use either, log-inverse)",
1075
+ }
1076
+
1077
+
1078
+ def precision_shift_predict_4bit(gamma_bf16: float, R2_bf16: float, is_GQA: bool) -> dict:
1079
+ """§33.3 — 4-bit precision shift direction predictor (paper 2 NEW, sesion 31).
1080
+
1081
+ Empirical n=5 rule: bf16 R² of power-law fit predicts 4-bit shift direction.
1082
+
1083
+ For MHA models:
1084
+ R²(bf16) < 0.9 (sink-dominated): 4-bit shifts γ UP toward γ_Padé
1085
+ R²(bf16) > 0.99 (clean): 4-bit shifts γ DOWN (introduces noise)
1086
+ 0.95 ≤ R² ≤ 0.99: stable (~no shift)
1087
+
1088
+ For GQA models: precision-robust regardless (|Δγ| < 0.05).
1089
+ """
1090
+ if is_GQA:
1091
+ return {
1092
+ "predicted_shift_direction": "stable",
1093
+ "expected_magnitude": "|Δγ| < 0.05",
1094
+ "reason": "GQA KV-sharing distributes attention; little long-tail to perturb",
1095
+ "recommendation": "Either bf16 or 4-bit OK for deployment",
1096
+ }
1097
+ # MHA case — depends on R²
1098
+ if R2_bf16 < 0.9:
1099
+ direction = "UP"
1100
+ magnitude = "+0.3 to +0.8 expected"
1101
+ reason = "Sink-dominated bf16: 4-bit truncates long-tail, reveals Padé prediction"
1102
+ elif R2_bf16 > 0.99:
1103
+ direction = "DOWN"
1104
+ magnitude = "−0.2 to −0.4 expected"
1105
+ reason = "Clean bf16: 4-bit further sparsifies, introduces non-monotonicity"
1106
+ else:
1107
+ direction = "stable"
1108
+ magnitude = "|Δγ| < 0.05"
1109
+ reason = "Borderline R², 4-bit minimal effect"
1110
+ return {
1111
+ "predicted_shift_direction": direction,
1112
+ "expected_magnitude": magnitude,
1113
+ "reason": reason,
1114
+ "evidence": "n=5 paired measurements (DeepSeek/Pythia-1B/Pythia-2.8B/Llama-3/Qwen-7B-Inst)",
1115
+ "caveat": "R²-direction rule is empirical, not formally derived",
1116
+ }
1117
+
1118
+
1119
+ def critical_exponents_bundle(gamma: float) -> dict:
1120
+ """§33.4 — Critical exponents bundle (paper 2 NEW, sesion 31 + GAME-O).
1121
+
1122
+ Returns ν_c (correlation length), β_c (order parameter), η_c (anomalous dim),
1123
+ α_C (specific heat), γ_susc (susceptibility) as functions of γ.
1124
+
1125
+ Hyperscaling consistent (Rushbrooke + Josephson, d=1).
1126
+
1127
+ NEW IDENTITY (GAME-P recursive):
1128
+ γ_susc(γ) = 1/(1−γ) + 2(1−γ) ≥ 2√2 (AM-GM bound)
1129
+ Minimum at γ = 1 − 1/√2 ≈ 0.293
1130
+ Equals c_central=3 at γ=0 AND γ=1/2
1131
+ """
1132
+ if gamma >= 1:
1133
+ return {"regime": "Hagedorn or beyond", "exponents": "diverge"}
1134
+ nu_c = 1 / (1 - gamma)
1135
+ beta_c = gamma - 1
1136
+ eta_c = gamma - 1 # CORRECTED from paper 1's η=2γ (Lévy mapping consistent with hyperscaling)
1137
+ alpha_C = 2 - 1 / (1 - gamma)
1138
+ gamma_susc = 1 / (1 - gamma) + 2 * (1 - gamma)
1139
+
1140
+ # AM-GM bound check
1141
+ gamma_min = 1 - 1 / math.sqrt(2)
1142
+ gamma_susc_min = 2 * math.sqrt(2)
1143
+
1144
+ return {
1145
+ "nu_correlation_length": nu_c,
1146
+ "beta_order_param": beta_c,
1147
+ "eta_anomalous_dim": eta_c,
1148
+ "eta_note": "η_c = γ−1 (Lévy-derived, hyperscaling-consistent). Paper 1 claim η=2γ is INCORRECT.",
1149
+ "alpha_specific_heat": alpha_C,
1150
+ "gamma_susceptibility": gamma_susc,
1151
+ "c_central_at_gamma_0": 3, # γ_susc(γ=0) = 3 = c_central
1152
+ "AM_GM_bound": {
1153
+ "min_gamma_susc": gamma_susc_min,
1154
+ "min_at_gamma": gamma_min,
1155
+ "interpretation": (
1156
+ f"γ_susc has UNIVERSAL minimum {gamma_susc_min:.3f} at γ = {gamma_min:.4f} "
1157
+ "(AM-GM with ab=2 product constant)"
1158
+ ),
1159
+ },
1160
+ "hyperscaling_check": {
1161
+ "Rushbrooke (α + 2β + γ_susc = 2)": alpha_C + 2 * beta_c + gamma_susc,
1162
+ "expected": 2,
1163
+ },
1164
+ "warning_paper1_eta": "Paper 1's η_c = 2γ is INCORRECT. Use η_c = γ-1 (this function).",
1165
+ }
1166
+
1167
+
1168
  def bimodal_phase_class(gamma: float) -> str:
1169
  """§32.2 — Bimodal classifier (paper 2 §4 finding F11).
1170