ml-intern
riemann-vmix / problem_solvers /cross_entropy_rh.py
swayam1111's picture
Upload problem_solvers/cross_entropy_rh.py
b104fc1 verified
"""
Cross-Entropy Rare Event Simulation for RH
=============================================
Inspired by arXiv:2409.19790 (Li & Li, 2024)
Key idea: The Riemann Hypothesis is about the location of ALL zeros.
We can frame this as a rare event simulation problem:
- Sample zero-spacing configurations
- Use cross-entropy method to adaptively bias sampling toward configurations
that are consistent with RH (all spacings showing GUE statistics)
- Estimate probability of "RH-violating" configurations (anomalous spacings)
The cross-entropy method:
1. Start with reference distribution (GUE Wigner surmise)
2. Sample configurations
3. Score each: how "RH-consistent" is it?
4. Update reference distribution to increase probability of high-scoring configs
5. Repeat until convergence
This gives us:
- An estimate of how "stable" the RH-consistent statistics are
- Identification of the most anomalous configurations
- A quantitative measure of how far zeros are from violating RH
"""
import numpy as np
from typing import Dict, List
from scipy import stats
class CrossEntropyRHSampler:
"""
Cross-entropy rare event simulation for zero spacing configurations.
Reference distribution: We model spacing distribution as a mixture
of GUE Wigner surmise and a perturbation. The CE method updates
the mixture weights to focus on extreme (anomalous) configurations.
"""
def __init__(self, zeros: List[float]):
self.zeros = np.array(zeros)
self.spacings = np.diff(self.zeros)
self.normalized = self.spacings / np.mean(self.spacings)
self.results = {}
def _wigner_pdf(self, s: np.ndarray) -> np.ndarray:
"""Wigner surmise PDF."""
return (np.pi / 2) * s * np.exp(-np.pi * s**2 / 4)
def _wigner_cdf(self, s: np.ndarray) -> np.ndarray:
"""Wigner surmise CDF."""
return 1.0 - np.exp(-np.pi * s**2 / 4)
def _score_configuration(self, config: np.ndarray) -> float:
"""
Score how RH-consistent a spacing configuration is.
Higher score = more consistent with GUE = more "RH-safe".
We use KS distance to Wigner surmise as the metric.
Lower KS = higher score.
"""
if len(config) < 10:
return 0.0
ks_stat, _ = stats.kstest(config, lambda x: self._wigner_cdf(np.array(x)))
return max(0, 1.0 - ks_stat) # Higher = better match
def _sample_from_mixture(self, n: int, wigner_weight: float = 0.9,
anomalous_weight: float = 0.1) -> np.ndarray:
"""Sample spacings from mixture of Wigner + anomalous distribution."""
# Sample from Wigner using inverse CDF
u = np.random.uniform(0, 1, int(n * wigner_weight))
# Inverse CDF of Wigner: F^{-1}(u) = sqrt(-4*log(1-u)/pi)
wigner_samples = np.sqrt(-4 * np.log(1 - u + 1e-15) / np.pi)
# Anomalous: sample from broader distribution (Gamma)
n_anom = int(n * anomalous_weight)
anom_samples = np.random.gamma(2, 1.0, n_anom)
samples = np.concatenate([wigner_samples, anom_samples])
np.random.shuffle(samples)
return samples[:n]
def run_simulation(self, n_iterations: int = 10, n_samples: int = 1000,
window_size: int = 500, elite_fraction: float = 0.1) -> Dict:
"""
Run cross-entropy rare event simulation.
At each iteration:
1. Sample configurations from current mixture
2. Score them
3. Update mixture weights based on elite samples
4. Track convergence
"""
print(f" [CrossEntropy] Running {n_iterations} CE iterations...")
# Start with actual zero spacings as reference
reference_config = self.normalized[:window_size]
iteration_scores = []
wigner_weights = []
best_configs = []
wigner_weight = 0.9 # Initial: mostly GUE-like
for iteration in range(n_iterations):
scores = []
configs = []
for _ in range(n_samples):
# Sample configuration
if iteration == 0:
# First iteration: use actual spacings with noise
config = reference_config + np.random.normal(0, 0.05, len(reference_config))
else:
config = self._sample_from_mixture(window_size, wigner_weight)
# Normalize
config = config / np.mean(config)
score = self._score_configuration(config)
scores.append(score)
configs.append(config)
scores = np.array(scores)
# Select elite (top configurations)
elite_threshold = np.percentile(scores, (1 - elite_fraction) * 100)
elite_indices = np.where(scores >= elite_threshold)[0]
elite_configs = [configs[i] for i in elite_indices]
# Update reference: fit new Wigner weight to elite configs
# Compute average KS distance of elite configs
elite_ks = [self._score_configuration(c) for c in elite_configs]
avg_elite_score = np.mean(elite_ks)
# Adapt mixture weight: if elite configs are very GUE-like,
# increase Wigner weight; if diverse, decrease it
if avg_elite_score > 0.95:
wigner_weight = min(0.99, wigner_weight + 0.02)
else:
wigner_weight = max(0.5, wigner_weight - 0.02)
iteration_scores.append(float(avg_elite_score))
wigner_weights.append(float(wigner_weight))
# Save best
best_idx = np.argmax(scores)
best_configs.append({
'score': float(scores[best_idx]),
'mean': float(np.mean(configs[best_idx])),
'std': float(np.std(configs[best_idx])),
'min': float(np.min(configs[best_idx])),
'max': float(np.max(configs[best_idx])),
})
if iteration % 3 == 0:
print(f" Iter {iteration}: avg elite score={avg_elite_score:.4f}, "
f"wigner_weight={wigner_weight:.3f}")
# Compute probability of "extreme" configurations
# (configs with score < 0.5 = significantly non-GUE)
final_scores = []
for _ in range(n_samples):
config = self._sample_from_mixture(window_size, wigner_weight)
config = config / np.mean(config)
final_scores.append(self._score_configuration(config))
extreme_prob = np.mean(np.array(final_scores) < 0.5)
self.results = {
'strategy': 'cross_entropy_rare_event_simulation',
'n_iterations': n_iterations,
'n_samples': n_samples,
'window_size': window_size,
'iteration_scores': iteration_scores,
'wigner_weights': wigner_weights,
'best_configs': best_configs,
'final_wigner_weight': float(wigner_weight),
'extreme_non_gue_probability': float(extreme_prob),
'interpretation': "Low extreme_prob = GUE statistics are stable. High = potential anomalies.",
}
return self.results
def summary(self) -> str:
r = self.results
s = f"Cross-Entropy Rare Event Simulation\n{'='*50}\n"
s += f"Iterations: {r['n_iterations']}, Samples/iter: {r['n_samples']}\n"
s += f"Final Wigner weight: {r['final_wigner_weight']:.3f}\n"
s += f"Elite score progression: {[round(x, 3) for x in r['iteration_scores']]}\n"
s += f"P(extreme non-GUE config): {r['extreme_non_gue_probability']:.4f}\n"
s += f"Interpretation: {r['interpretation']}\n"
if r['extreme_non_gue_probability'] < 0.01:
s += "→ GUE statistics are HIGHLY stable — consistent with RH.\n"
return s