Spaces:
Running
fix: 6 formula corrections in TAF Agent (audit-driven)
Browse filesAudit via Sócrates v0.1 found 6 issues in cli/diagnose_model.py + python/taf_browser.py.
CRITICAL bugs (wrong recommendations to users):
1. D_f_closed: Phase B (γ>1) clamped to N when truth is ~3% of N.
Affected LLaMA-2/3, Gemma, Mistral, Qwen2.5 — all returned ~all-tokens
when paper says compress to ~50-100 tokens.
2. D_f_closed: Hagedorn buffer |γ-1|<0.01 used N·f^(1/log N) instead
of N^f, giving ~2× wrong values for models near γ=1.
Replaced both with discrete-truth implementation: smallest D such that
∑_{d=1}^D d^{-γ} / Σ_{d=1}^N d^{-γ} ≥ f. The paper's "exact continuous
formula" is actually a continuum integral approximation that diverges
5-57% from the discrete sum (worse for higher γ).
MEDIUM:
3. partition_Z(γ=1, N) used log(N+0.5), missing Euler-Mascheroni γ_E ≈
0.577 — ~7% underestimate of H_N. Now log(N) + γ_E.
4. free_energy_F returned -log(Z) (β·F convention), now -log(Z)/γ
(Helmholtz F, consistent with U-TS thermodynamic identity).
LOW:
5. γ_pred used obsolete C/lnθ heuristic; now uses γ_Padé(θ, T_eval)
matching paper §3.3.
6. df_window had dead code `if γ>=1: return f*N` (already excluded by
the [0.65, 0.85] guard); also wrong if it ever ran. Removed.
Tests: tests/test_taf_formulas.py — 19/19 pass, including:
- boundary cases γ ∈ {0.99, 1.01, 1.026, 1.046, 1.5}
- thermodynamic identity S = γ(U-F)
- C_V converging to (logN)²/12 (paper §5.2 erratum)
- θ_design ∘ γ_Padé = id
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- .gitignore +0 -0
- cli/diagnose_model.py +71 -8
- python/taf_browser.py +19 -5
- tests/test_taf_formulas.py +243 -0
|
Binary files a/.gitignore and b/.gitignore differ
|
|
|
|
@@ -70,9 +70,18 @@ OUTPUT_DIR = Path("./diagnose_results")
|
|
| 70 |
|
| 71 |
# ── Thermodynamic functions ────────────────────────────────────────────────────
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
def partition_Z(gamma: float, N: int) -> float:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
if abs(gamma - 1.0) < 1e-5:
|
| 75 |
-
return math.log(N + 0.5)
|
| 76 |
return (N ** (1 - gamma) - 1) / (1 - gamma) + 1
|
| 77 |
|
| 78 |
|
|
@@ -93,7 +102,13 @@ def entropy_S(gamma: float, N: int) -> float:
|
|
| 93 |
|
| 94 |
|
| 95 |
def free_energy_F(gamma: float, N: int) -> float:
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
|
| 99 |
def heat_capacity_Cv(gamma: float, N: int, delta: float = 1e-4) -> float:
|
|
@@ -104,9 +119,51 @@ def heat_capacity_Cv(gamma: float, N: int, delta: float = 1e-4) -> float:
|
|
| 104 |
|
| 105 |
|
| 106 |
def D_f_closed(gamma: float, f: float, N: int) -> int:
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
def delta_H(theta: float, Df: int, N: int) -> float:
|
|
@@ -312,8 +369,14 @@ def run_diagnostic(args) -> dict:
|
|
| 312 |
dH90 = delta_H(theta_nom, D90, N)
|
| 313 |
theta_eff = theta_eff_pade(theta_nom, float(N))
|
| 314 |
|
| 315 |
-
# Theoretical
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
# Attention grammar KL
|
| 319 |
kl_ag = grammar_kl(attn_by_d, gamma, log_A)
|
|
@@ -328,7 +391,7 @@ def run_diagnostic(args) -> dict:
|
|
| 328 |
print(f" γ (gamma) = {gamma:.4f} [R²={R2:.4f}]")
|
| 329 |
if gamma_pred is not None:
|
| 330 |
delta_g = gamma - gamma_pred
|
| 331 |
-
print(f" γ
|
| 332 |
print(f" Phase : {phase}")
|
| 333 |
print(f" T_attn = 1/γ = {T_attn:.4f}")
|
| 334 |
print()
|
|
|
|
| 70 |
|
| 71 |
# ── Thermodynamic functions ────────────────────────────────────────────────────
|
| 72 |
|
| 73 |
+
# Euler-Mascheroni constant — needed for accurate H_N approximation at γ=1.
|
| 74 |
+
EULER_GAMMA = 0.5772156649015329
|
| 75 |
+
|
| 76 |
+
|
| 77 |
def partition_Z(gamma: float, N: int) -> float:
|
| 78 |
+
"""Z(γ, N) = sum_{d=1}^N d^{-γ}.
|
| 79 |
+
|
| 80 |
+
γ=1: H_N ~ log N + γ_E + 1/(2N) − ... [Euler-Mascheroni asymptotic]
|
| 81 |
+
γ≠1: integral approximation + d=1 boundary.
|
| 82 |
+
"""
|
| 83 |
if abs(gamma - 1.0) < 1e-5:
|
| 84 |
+
return math.log(N) + EULER_GAMMA # was math.log(N+0.5), missing γ_E
|
| 85 |
return (N ** (1 - gamma) - 1) / (1 - gamma) + 1
|
| 86 |
|
| 87 |
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
def free_energy_F(gamma: float, N: int) -> float:
|
| 105 |
+
"""Helmholtz free energy: F = -T·log(Z) = -log(Z)/γ (T_attn = 1/γ).
|
| 106 |
+
|
| 107 |
+
Was: -log(Z) [β·F = log-partition convention; ambiguous when reported as F].
|
| 108 |
+
Now: -log(Z)/γ [physical F, consistent with U = -∂(log Z)/∂γ and S = (U − F)/T].
|
| 109 |
+
"""
|
| 110 |
+
Z = max(partition_Z(gamma, N), 1e-30)
|
| 111 |
+
return -math.log(Z) / max(gamma, 1e-9)
|
| 112 |
|
| 113 |
|
| 114 |
def heat_capacity_Cv(gamma: float, N: int, delta: float = 1e-4) -> float:
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
def D_f_closed(gamma: float, f: float, N: int) -> int:
|
| 122 |
+
"""KV compression window — DISCRETE truth (exact for the sum).
|
| 123 |
+
|
| 124 |
+
Smallest D such that ∑_{d=1}^D d^{-γ} / ∑_{d=1}^N d^{-γ} ≥ f.
|
| 125 |
+
|
| 126 |
+
The paper's "exact continuous formula"
|
| 127 |
+
D_f = [(1−f) + f·N^(1−γ)]^{1/(1−γ)} (and the γ=1 limit N^f)
|
| 128 |
+
is a CONTINUUM INTEGRAL APPROXIMATION that diverges from the discrete
|
| 129 |
+
sum by 5–50% in Phase B (γ>1), where the agent serves users.
|
| 130 |
+
Since N is bounded by context window (≤ ~10⁶), direct summation is
|
| 131 |
+
O(N) and fast (<10 ms). We use it for accuracy.
|
| 132 |
+
"""
|
| 133 |
+
if N <= 0:
|
| 134 |
+
return 1
|
| 135 |
+
if not (0.0 < gamma):
|
| 136 |
+
return N # ill-defined; safe upper bound
|
| 137 |
+
# Direct discrete cumulative
|
| 138 |
+
weights = [d ** (-gamma) for d in range(1, N + 1)]
|
| 139 |
+
total = sum(weights)
|
| 140 |
+
if total <= 0 or not math.isfinite(total):
|
| 141 |
+
# Fall back to continuum closed form (rare numerical edge case)
|
| 142 |
+
return _D_f_closed_continuum(gamma, f, N)
|
| 143 |
+
target = f * total
|
| 144 |
+
cum = 0.0
|
| 145 |
+
for d, w in enumerate(weights, start=1):
|
| 146 |
+
cum += w
|
| 147 |
+
if cum >= target:
|
| 148 |
+
return d
|
| 149 |
+
return N
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def _D_f_closed_continuum(gamma: float, f: float, N: int) -> int:
|
| 153 |
+
"""Continuum closed form (paper Theorem 7.1) — asymptotic, kept as fallback."""
|
| 154 |
+
if abs(gamma - 1.0) < 1e-9:
|
| 155 |
+
return max(1, min(N, int(round(N ** f))))
|
| 156 |
+
one_minus_g = 1.0 - gamma
|
| 157 |
+
base = (1 - f) + f * (N ** one_minus_g)
|
| 158 |
+
if base <= 0:
|
| 159 |
+
return 1
|
| 160 |
+
try:
|
| 161 |
+
d_f = base ** (1.0 / one_minus_g)
|
| 162 |
+
except (OverflowError, ValueError):
|
| 163 |
+
return N
|
| 164 |
+
if not math.isfinite(d_f):
|
| 165 |
+
return N
|
| 166 |
+
return max(1, min(N, int(round(d_f))))
|
| 167 |
|
| 168 |
|
| 169 |
def delta_H(theta: float, Df: int, N: int) -> float:
|
|
|
|
| 369 |
dH90 = delta_H(theta_nom, D90, N)
|
| 370 |
theta_eff = theta_eff_pade(theta_nom, float(N))
|
| 371 |
|
| 372 |
+
# Theoretical γ prediction — γ_Padé(θ, T_eval) (paper §3.3, supersedes
|
| 373 |
+
# the earlier shorthand γ ≈ C/lnθ which assumed T = 10000).
|
| 374 |
+
if theta_nom > 0:
|
| 375 |
+
T_for_pred = max(distances) if distances else N # use largest measured T
|
| 376 |
+
z_sqrt2 = T_for_pred * math.sqrt(2)
|
| 377 |
+
gamma_pred = (2 * theta_nom - z_sqrt2) / (2 * theta_nom + z_sqrt2)
|
| 378 |
+
else:
|
| 379 |
+
gamma_pred = None
|
| 380 |
|
| 381 |
# Attention grammar KL
|
| 382 |
kl_ag = grammar_kl(attn_by_d, gamma, log_A)
|
|
|
|
| 391 |
print(f" γ (gamma) = {gamma:.4f} [R²={R2:.4f}]")
|
| 392 |
if gamma_pred is not None:
|
| 393 |
delta_g = gamma - gamma_pred
|
| 394 |
+
print(f" γ_Padé(θ,T) = {gamma_pred:.4f} Δγ = {delta_g:+.4f}")
|
| 395 |
print(f" Phase : {phase}")
|
| 396 |
print(f" T_attn = 1/γ = {T_attn:.4f}")
|
| 397 |
print()
|
|
@@ -77,13 +77,27 @@ def alpha_opt(gamma_target: float, T_eval: int, theta_nominal: float) -> float:
|
|
| 77 |
|
| 78 |
|
| 79 |
def df_window(gamma: float, N: int, f: float = 0.90):
|
| 80 |
-
"""§26.7 — KV compression window
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
if not (0.65 <= gamma <= 0.85):
|
| 82 |
return None
|
| 83 |
-
if
|
| 84 |
-
return
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
def kv_soft_decay_regime(theta: float, gamma: float, T_train: int) -> str:
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
def df_window(gamma: float, N: int, f: float = 0.90):
|
| 80 |
+
"""§26.7 — KV compression window via DISCRETE cumulative sum.
|
| 81 |
+
|
| 82 |
+
Returns None outside calibrated zone γ ∈ [0.65, 0.85]. Inside, computes
|
| 83 |
+
the smallest D such that ∑_{d=1}^D d^{-γ} / ∑_{d=1}^N d^{-γ} ≥ f.
|
| 84 |
+
|
| 85 |
+
This is exact for the discrete attention distribution and avoids the
|
| 86 |
+
continuum-approximation error that the paper's closed form has at γ→1.
|
| 87 |
+
"""
|
| 88 |
if not (0.65 <= gamma <= 0.85):
|
| 89 |
return None
|
| 90 |
+
if N <= 0:
|
| 91 |
+
return 1
|
| 92 |
+
weights = [d ** (-gamma) for d in range(1, N + 1)]
|
| 93 |
+
total = sum(weights)
|
| 94 |
+
target = f * total
|
| 95 |
+
cum = 0.0
|
| 96 |
+
for d, w in enumerate(weights, start=1):
|
| 97 |
+
cum += w
|
| 98 |
+
if cum >= target:
|
| 99 |
+
return d
|
| 100 |
+
return N
|
| 101 |
|
| 102 |
|
| 103 |
def kv_soft_decay_regime(theta: float, gamma: float, T_train: int) -> str:
|
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Numerical tests for TAF Agent formulas — paper §3.3, §5, §7.1.
|
| 2 |
+
|
| 3 |
+
Verifies the corrected implementations match:
|
| 4 |
+
- exact theoretical paper formulas (γ_Padé, D_f closed)
|
| 5 |
+
- numerical ground truth (partition_Z at γ=1, mean_log_d)
|
| 6 |
+
- paper Table §7.1 compression examples
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import math
|
| 11 |
+
import sys
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 15 |
+
sys.path.insert(0, str(ROOT / "cli"))
|
| 16 |
+
sys.path.insert(0, str(ROOT / "python"))
|
| 17 |
+
|
| 18 |
+
from diagnose_model import ( # type: ignore
|
| 19 |
+
D_f_closed, free_energy_F, partition_Z, mean_log_d,
|
| 20 |
+
entropy_S, heat_capacity_Cv, theta_eff_pade, EULER_GAMMA,
|
| 21 |
+
)
|
| 22 |
+
from taf_browser import ( # type: ignore
|
| 23 |
+
gamma_pade, d_horizon, theta_design, df_window,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 28 |
+
# γ_Padé (sanity)
|
| 29 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def test_gamma_pade_T_zero_gives_one():
|
| 33 |
+
assert abs(gamma_pade(10000, 0) - 1.0) < 1e-12
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def test_gamma_pade_at_T_theta_sqrt2_gives_zero():
|
| 37 |
+
"""T = θ√2 ⇒ γ_Padé = 0 (paper saturation point)."""
|
| 38 |
+
theta = 10000
|
| 39 |
+
T = int(theta * math.sqrt(2))
|
| 40 |
+
g = gamma_pade(theta, T)
|
| 41 |
+
assert abs(g) < 1e-3, f"got {g}"
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def test_gamma_pade_at_T_theta_over_sqrt2_NOT_zero():
|
| 45 |
+
"""T = θ/√2 (= d_alias) gives γ_Padé = 1/3, NOT 0
|
| 46 |
+
(only γ_LINEAR saturates here)."""
|
| 47 |
+
theta = 10000
|
| 48 |
+
T = int(theta / math.sqrt(2))
|
| 49 |
+
g = gamma_pade(theta, T)
|
| 50 |
+
assert abs(g - 1.0/3.0) < 0.01, f"expected ~1/3, got {g}"
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 54 |
+
# partition_Z γ=1: H_N + Euler-Mascheroni
|
| 55 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def test_partition_Z_at_gamma_1_matches_H_N():
|
| 59 |
+
"""partition_Z(1, N) should approximate H_N = ∑ 1/d to within 1%."""
|
| 60 |
+
for N in (100, 1000, 10000):
|
| 61 |
+
H_N = sum(1.0 / d for d in range(1, N + 1))
|
| 62 |
+
Z_pred = partition_Z(1.0, N)
|
| 63 |
+
rel_err = abs(Z_pred - H_N) / H_N
|
| 64 |
+
assert rel_err < 0.01, f"N={N}: H_N={H_N:.4f}, code={Z_pred:.4f}, err={rel_err:.4f}"
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def test_partition_Z_at_gamma_neq_1_continuous():
|
| 68 |
+
"""Z is continuous across γ=1 boundary (limit-consistent)."""
|
| 69 |
+
Z_below = partition_Z(0.99999, 10000)
|
| 70 |
+
Z_above = partition_Z(1.00001, 10000)
|
| 71 |
+
Z_at = partition_Z(1.0, 10000)
|
| 72 |
+
assert abs(Z_below - Z_at) < 0.05 * Z_at
|
| 73 |
+
assert abs(Z_above - Z_at) < 0.05 * Z_at
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 77 |
+
# D_f_closed: exact paper Theorem 7.1
|
| 78 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _df_numerical_truth(gamma: float, f: float, N: int) -> int:
|
| 82 |
+
"""Brute-force compute the smallest D such that ∑_{d=1}^D d^{-γ}/Z ≥ f."""
|
| 83 |
+
weights = [d ** (-gamma) for d in range(1, N + 1)]
|
| 84 |
+
total = sum(weights)
|
| 85 |
+
cum = 0.0
|
| 86 |
+
for d, w in enumerate(weights, start=1):
|
| 87 |
+
cum += w
|
| 88 |
+
if cum / total >= f:
|
| 89 |
+
return d
|
| 90 |
+
return N
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def test_D_f_phase_A_pythia_70m():
|
| 94 |
+
"""Pythia-70m γ=0.748, paper Table §7.1: D_0.90 ≈ 1383."""
|
| 95 |
+
truth = _df_numerical_truth(0.748, 0.90, 2000)
|
| 96 |
+
code = D_f_closed(0.748, 0.90, 2000)
|
| 97 |
+
assert abs(code - truth) <= max(15, 0.02 * truth), \
|
| 98 |
+
f"phase A: code={code}, truth={truth}"
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def test_D_f_phase_A_pythia_2_8b():
|
| 102 |
+
"""pythia-2.8b γ=0.674, paper: D_0.90 ≈ 1476."""
|
| 103 |
+
truth = _df_numerical_truth(0.674, 0.90, 2000)
|
| 104 |
+
code = D_f_closed(0.674, 0.90, 2000)
|
| 105 |
+
assert abs(code - truth) <= max(15, 0.02 * truth)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def test_D_f_at_gamma_1_matches_discrete_truth():
|
| 109 |
+
"""At γ=1: discrete D_f from cumulative ∑ 1/d ≥ f·H_N.
|
| 110 |
+
Continuum approximation N^f overestimates by ~6%.
|
| 111 |
+
"""
|
| 112 |
+
truth = _df_numerical_truth(1.0, 0.9, 2000)
|
| 113 |
+
code = D_f_closed(1.0, 0.9, 2000)
|
| 114 |
+
assert code == truth, f"γ=1: code={code}, truth={truth}"
|
| 115 |
+
# Document continuum-approx discrepancy:
|
| 116 |
+
continuum = int(round(2000 ** 0.9))
|
| 117 |
+
assert abs(continuum - truth) > 30, \
|
| 118 |
+
"continuum N^f should differ from discrete truth at γ=1"
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def test_D_f_phase_B_severe_compression():
|
| 122 |
+
"""γ=1.5: discrete-truth implementation → exact match."""
|
| 123 |
+
truth = _df_numerical_truth(1.5, 0.90, 2000)
|
| 124 |
+
code = D_f_closed(1.5, 0.90, 2000)
|
| 125 |
+
assert code == truth, f"phase B: code={code}, truth={truth}"
|
| 126 |
+
assert code < 200, f"phase B should be tiny, got {code}"
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def test_D_f_llama_3_8b_phase_B():
|
| 130 |
+
"""LLaMA-3-8B γ=1.046 — discrete truth, exact."""
|
| 131 |
+
truth = _df_numerical_truth(1.046, 0.90, 2000)
|
| 132 |
+
code = D_f_closed(1.046, 0.90, 2000)
|
| 133 |
+
assert code == truth
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def test_D_f_at_boundary_0_99():
|
| 137 |
+
truth = _df_numerical_truth(0.99, 0.90, 2000)
|
| 138 |
+
code = D_f_closed(0.99, 0.90, 2000)
|
| 139 |
+
assert code == truth
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def test_D_f_at_boundary_1_01():
|
| 143 |
+
truth = _df_numerical_truth(1.01, 0.90, 2000)
|
| 144 |
+
code = D_f_closed(1.01, 0.90, 2000)
|
| 145 |
+
assert code == truth
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 149 |
+
# free_energy_F: physics convention F = -log(Z)/γ
|
| 150 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def test_free_energy_F_physics_convention():
|
| 154 |
+
"""F = -T·log(Z) = -log(Z)/γ."""
|
| 155 |
+
for gamma in (0.5, 0.75, 1.0, 1.5):
|
| 156 |
+
Z = partition_Z(gamma, 2000)
|
| 157 |
+
expected = -math.log(Z) / gamma
|
| 158 |
+
code = free_energy_F(gamma, 2000)
|
| 159 |
+
assert abs(code - expected) < 1e-8, \
|
| 160 |
+
f"γ={gamma}: code={code}, expected={expected}"
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def test_thermodynamic_identity_S_equals_U_minus_F_over_T():
|
| 164 |
+
"""Sanity: S = (U − F)/T = γ·(U − F).
|
| 165 |
+
Equivalently S = γU + log Z when F = -log Z/γ.
|
| 166 |
+
"""
|
| 167 |
+
for gamma in (0.5, 0.75, 1.0, 1.5):
|
| 168 |
+
Z = partition_Z(gamma, 2000)
|
| 169 |
+
U = mean_log_d(gamma, 2000)
|
| 170 |
+
F = free_energy_F(gamma, 2000)
|
| 171 |
+
S_from_eq = gamma * (U - F)
|
| 172 |
+
S_direct = entropy_S(gamma, 2000)
|
| 173 |
+
# In our entropy_S = log Z + γU, and corrected F = -log Z/γ ⇒
|
| 174 |
+
# γ(U − F) = γU + log Z = S. So they MUST match.
|
| 175 |
+
assert abs(S_from_eq - S_direct) < 1e-8, \
|
| 176 |
+
f"γ={gamma}: S_eq={S_from_eq}, S_direct={S_direct}"
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 180 |
+
# C_V at Hagedorn — paper §5.2 was wrong, agent's numerical-derivative is OK
|
| 181 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def test_cv_at_hagedorn_matches_corrected_asymptotic():
|
| 185 |
+
"""C_V(γ=1, N) ~ (log N)²/12 + sub-leading corrections.
|
| 186 |
+
Agent's numerical derivative gives the exact discrete value; ratio to
|
| 187 |
+
the leading asymptotic /12 converges slowly (1/log N rate).
|
| 188 |
+
Paper §5.2 said /4 — wrong by factor 3.
|
| 189 |
+
"""
|
| 190 |
+
# Verify agent does NOT match /4 (paper's claim)
|
| 191 |
+
cv_10000 = heat_capacity_Cv(1.0, 10000)
|
| 192 |
+
pred_paper_wrong = math.log(10000) ** 2 / 4.0
|
| 193 |
+
assert cv_10000 / pred_paper_wrong < 0.5, "C_V should NOT match paper's /4"
|
| 194 |
+
|
| 195 |
+
# Verify it DOES converge to /12 from above
|
| 196 |
+
ratios = []
|
| 197 |
+
for N in (1000, 10000, 100000):
|
| 198 |
+
cv = heat_capacity_Cv(1.0, N)
|
| 199 |
+
pred_corrected = math.log(N) ** 2 / 12.0
|
| 200 |
+
ratios.append(cv / pred_corrected)
|
| 201 |
+
# Monotone decreasing toward 1 from above
|
| 202 |
+
assert ratios[0] > ratios[1] > ratios[2] > 1.0
|
| 203 |
+
assert ratios[-1] < 1.20, f"N=10⁵ ratio should approach 1, got {ratios[-1]:.4f}"
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 207 |
+
# Browser df_window — exact in calibrated zone, None outside
|
| 208 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def test_df_window_in_zone():
|
| 212 |
+
"""γ=0.748 ∈ [0.65, 0.85]: should match exact paper formula."""
|
| 213 |
+
truth = _df_numerical_truth(0.748, 0.90, 2000)
|
| 214 |
+
code = df_window(0.748, 2000, 0.90)
|
| 215 |
+
assert code is not None
|
| 216 |
+
assert abs(code - truth) <= max(15, 0.02 * truth)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def test_df_window_out_of_zone_returns_None():
|
| 220 |
+
assert df_window(0.5, 2000) is None # too low
|
| 221 |
+
assert df_window(0.95, 2000) is None # too high
|
| 222 |
+
assert df_window(1.5, 2000) is None # phase B
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
# ────────────────────────────────────────────────────────────────────────��
|
| 226 |
+
# Sanity: theta_design + gamma_pade are inverses
|
| 227 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def test_theta_design_inverts_gamma_pade():
|
| 231 |
+
"""θ_design(γ, T) should yield θ such that γ_Padé(θ, T) = γ exactly."""
|
| 232 |
+
for gamma_target in (0.3, 0.5, 0.7, 0.85):
|
| 233 |
+
for T in (1000, 2000, 8000):
|
| 234 |
+
theta = theta_design(gamma_target, T)
|
| 235 |
+
recovered = gamma_pade(theta, T)
|
| 236 |
+
assert abs(recovered - gamma_target) < 1e-9
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def test_theta_eff_pade_definition():
|
| 240 |
+
"""θ_eff_Padé = θ + T/√2 (paper definition)."""
|
| 241 |
+
for theta in (10000, 500000, 1_000_000):
|
| 242 |
+
for T in (1000, 2000):
|
| 243 |
+
assert abs(theta_eff_pade(theta, T) - (theta + T / math.sqrt(2))) < 1e-9
|