"""Numerical tests for TAF Agent formulas — paper §3.3, §5, §7.1. Verifies the corrected implementations match: - exact theoretical paper formulas (γ_Padé, D_f closed) - numerical ground truth (partition_Z at γ=1, mean_log_d) - paper Table §7.1 compression examples """ from __future__ import annotations import math import sys from pathlib import Path ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT / "cli")) sys.path.insert(0, str(ROOT / "python")) from diagnose_model import ( # type: ignore D_f_closed, free_energy_F, partition_Z, mean_log_d, entropy_S, heat_capacity_Cv, theta_eff_pade, EULER_GAMMA, ) from taf_browser import ( # type: ignore gamma_pade, d_horizon, theta_design, df_window, gamma_decompose, gamma_decompose_v2, ) # ───────────────────────────────────────────────────────────────────────── # γ_Padé (sanity) # ───────────────────────────────────────────────────────────────────────── def test_gamma_pade_T_zero_gives_one(): assert abs(gamma_pade(10000, 0) - 1.0) < 1e-12 def test_gamma_pade_at_T_theta_sqrt2_gives_zero(): """T = θ√2 ⇒ γ_Padé = 0 (paper saturation point).""" theta = 10000 T = int(theta * math.sqrt(2)) g = gamma_pade(theta, T) assert abs(g) < 1e-3, f"got {g}" def test_gamma_pade_at_T_theta_over_sqrt2_NOT_zero(): """T = θ/√2 (= d_alias) gives γ_Padé = 1/3, NOT 0 (only γ_LINEAR saturates here).""" theta = 10000 T = int(theta / math.sqrt(2)) g = gamma_pade(theta, T) assert abs(g - 1.0/3.0) < 0.01, f"expected ~1/3, got {g}" # ───────────────────────────────────────────────────────────────────────── # partition_Z γ=1: H_N + Euler-Mascheroni # ───────────────────────────────────────────────────────────────────────── def test_partition_Z_at_gamma_1_matches_H_N(): """partition_Z(1, N) should approximate H_N = ∑ 1/d to within 1%.""" for N in (100, 1000, 10000): H_N = sum(1.0 / d for d in range(1, N + 1)) Z_pred = partition_Z(1.0, N) rel_err = abs(Z_pred - H_N) / H_N assert rel_err < 0.01, f"N={N}: H_N={H_N:.4f}, code={Z_pred:.4f}, err={rel_err:.4f}" def test_partition_Z_at_gamma_neq_1_continuous(): """Z is continuous across γ=1 boundary (limit-consistent).""" Z_below = partition_Z(0.99999, 10000) Z_above = partition_Z(1.00001, 10000) Z_at = partition_Z(1.0, 10000) assert abs(Z_below - Z_at) < 0.05 * Z_at assert abs(Z_above - Z_at) < 0.05 * Z_at # ───────────────────────────────────────────────────────────────────────── # D_f_closed: exact paper Theorem 7.1 # ───────────────────────────────────────────────────────────────────────── def _df_numerical_truth(gamma: float, f: float, N: int) -> int: """Brute-force compute the smallest D such that ∑_{d=1}^D d^{-γ}/Z ≥ f.""" weights = [d ** (-gamma) for d in range(1, N + 1)] total = sum(weights) cum = 0.0 for d, w in enumerate(weights, start=1): cum += w if cum / total >= f: return d return N def test_D_f_phase_A_pythia_70m(): """Pythia-70m γ=0.748, paper Table §7.1: D_0.90 ≈ 1383.""" truth = _df_numerical_truth(0.748, 0.90, 2000) code = D_f_closed(0.748, 0.90, 2000) assert abs(code - truth) <= max(15, 0.02 * truth), \ f"phase A: code={code}, truth={truth}" def test_D_f_phase_A_pythia_2_8b(): """pythia-2.8b γ=0.674, paper: D_0.90 ≈ 1476.""" truth = _df_numerical_truth(0.674, 0.90, 2000) code = D_f_closed(0.674, 0.90, 2000) assert abs(code - truth) <= max(15, 0.02 * truth) def test_D_f_at_gamma_1_matches_discrete_truth(): """At γ=1: discrete D_f from cumulative ∑ 1/d ≥ f·H_N. Continuum approximation N^f overestimates by ~6%. """ truth = _df_numerical_truth(1.0, 0.9, 2000) code = D_f_closed(1.0, 0.9, 2000) assert code == truth, f"γ=1: code={code}, truth={truth}" # Document continuum-approx discrepancy: continuum = int(round(2000 ** 0.9)) assert abs(continuum - truth) > 30, \ "continuum N^f should differ from discrete truth at γ=1" def test_D_f_phase_B_severe_compression(): """γ=1.5: discrete-truth implementation → exact match.""" truth = _df_numerical_truth(1.5, 0.90, 2000) code = D_f_closed(1.5, 0.90, 2000) assert code == truth, f"phase B: code={code}, truth={truth}" assert code < 200, f"phase B should be tiny, got {code}" def test_D_f_llama_3_8b_phase_B(): """LLaMA-3-8B γ=1.046 — discrete truth, exact.""" truth = _df_numerical_truth(1.046, 0.90, 2000) code = D_f_closed(1.046, 0.90, 2000) assert code == truth def test_D_f_at_boundary_0_99(): truth = _df_numerical_truth(0.99, 0.90, 2000) code = D_f_closed(0.99, 0.90, 2000) assert code == truth def test_D_f_at_boundary_1_01(): truth = _df_numerical_truth(1.01, 0.90, 2000) code = D_f_closed(1.01, 0.90, 2000) assert code == truth # ───────────────────────────────────────────────────────────────────────── # free_energy_F: physics convention F = -log(Z)/γ # ───────────────────────────────────────────────────────────────────────── def test_free_energy_F_physics_convention(): """F = -T·log(Z) = -log(Z)/γ.""" for gamma in (0.5, 0.75, 1.0, 1.5): Z = partition_Z(gamma, 2000) expected = -math.log(Z) / gamma code = free_energy_F(gamma, 2000) assert abs(code - expected) < 1e-8, \ f"γ={gamma}: code={code}, expected={expected}" def test_thermodynamic_identity_S_equals_U_minus_F_over_T(): """Sanity: S = (U − F)/T = γ·(U − F). Equivalently S = γU + log Z when F = -log Z/γ. """ for gamma in (0.5, 0.75, 1.0, 1.5): Z = partition_Z(gamma, 2000) U = mean_log_d(gamma, 2000) F = free_energy_F(gamma, 2000) S_from_eq = gamma * (U - F) S_direct = entropy_S(gamma, 2000) # In our entropy_S = log Z + γU, and corrected F = -log Z/γ ⇒ # γ(U − F) = γU + log Z = S. So they MUST match. assert abs(S_from_eq - S_direct) < 1e-8, \ f"γ={gamma}: S_eq={S_from_eq}, S_direct={S_direct}" # ───────────────────────────────────────────────────────────────────────── # C_V at Hagedorn — paper §5.2 was wrong, agent's numerical-derivative is OK # ───────────────────────────────────────────────────────────────────────── def test_cv_at_hagedorn_matches_corrected_asymptotic(): """C_V(γ=1, N) ~ (log N)²/12 + sub-leading corrections. Agent's numerical derivative gives the exact discrete value; ratio to the leading asymptotic /12 converges slowly (1/log N rate). Paper §5.2 said /4 — wrong by factor 3. """ # Verify agent does NOT match /4 (paper's claim) cv_10000 = heat_capacity_Cv(1.0, 10000) pred_paper_wrong = math.log(10000) ** 2 / 4.0 assert cv_10000 / pred_paper_wrong < 0.5, "C_V should NOT match paper's /4" # Verify it DOES converge to /12 from above ratios = [] for N in (1000, 10000, 100000): cv = heat_capacity_Cv(1.0, N) pred_corrected = math.log(N) ** 2 / 12.0 ratios.append(cv / pred_corrected) # Monotone decreasing toward 1 from above assert ratios[0] > ratios[1] > ratios[2] > 1.0 assert ratios[-1] < 1.20, f"N=10⁵ ratio should approach 1, got {ratios[-1]:.4f}" # ───────────────────────────────────────────────────────────────────────── # Browser df_window — exact in calibrated zone, None outside # ───────────────────────────────────────────────────────────────────────── def test_df_window_in_zone(): """γ=0.748 ∈ [0.65, 0.85]: should match exact paper formula.""" truth = _df_numerical_truth(0.748, 0.90, 2000) code = df_window(0.748, 2000, 0.90) assert code is not None assert abs(code - truth) <= max(15, 0.02 * truth) def test_df_window_out_of_zone_returns_None(): assert df_window(0.5, 2000) is None # too low assert df_window(0.95, 2000) is None # too high assert df_window(1.5, 2000) is None # phase B # ───────────────────────────────────────────────────────────────────────── # Sanity: theta_design + gamma_pade are inverses # ───────────────────────────────────────────────────────────────────────── def test_theta_design_inverts_gamma_pade(): """θ_design(γ, T) should yield θ such that γ_Padé(θ, T) = γ exactly.""" for gamma_target in (0.3, 0.5, 0.7, 0.85): for T in (1000, 2000, 8000): theta = theta_design(gamma_target, T) recovered = gamma_pade(theta, T) assert abs(recovered - gamma_target) < 1e-9 def test_theta_eff_pade_definition(): """θ_eff_Padé = θ + T/√2 (paper definition).""" for theta in (10000, 500000, 1_000_000): for T in (1000, 2000): assert abs(theta_eff_pade(theta, T) - (theta + T / math.sqrt(2))) < 1e-9 # ───────────────────────────────────────────────────────────────────────── # gamma_decompose: audit-driven calibration changes # ───────────────────────────────────────────────────────────────────────── def test_decompose_SWA_disabled(): """δ_SWA was originally fit on n=1 — must NOT apply correction; status flagged.""" result = gamma_decompose(0.75, has_SWA=True) assert result["delta_SWA"] == 0.0 assert "n1_disabled" in result["delta_SWA_status"] def test_decompose_GQA_still_active(): """δ_GQA replicates in panel re-audit (+0.115 vs +0.11 hardcoded).""" on = gamma_decompose(0.75, has_GQA=True) off = gamma_decompose(0.75, has_GQA=False) assert abs(on["delta_GQA"] - 0.11) < 1e-9 assert off["delta_GQA"] == 0.0 def test_decompose_v2_warnings_present(): """v2 must emit calibration_warning.""" r = gamma_decompose_v2(0.75, n_params_M=500, has_SWA=True, is_instruct=True) assert "calibration_warning" in r assert r["delta_SWA"] == 0.0 # disabled assert "exploratory" in r["delta_SWA_status"] or "n1" in r["delta_SWA_status"]