ml-intern
swayam1111 commited on
Commit
b104fc1
·
verified ·
1 Parent(s): 1c60ea2

Upload problem_solvers/cross_entropy_rh.py

Browse files
Files changed (1) hide show
  1. problem_solvers/cross_entropy_rh.py +193 -0
problem_solvers/cross_entropy_rh.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Cross-Entropy Rare Event Simulation for RH
3
+ =============================================
4
+ Inspired by arXiv:2409.19790 (Li & Li, 2024)
5
+
6
+ Key idea: The Riemann Hypothesis is about the location of ALL zeros.
7
+ We can frame this as a rare event simulation problem:
8
+ - Sample zero-spacing configurations
9
+ - Use cross-entropy method to adaptively bias sampling toward configurations
10
+ that are consistent with RH (all spacings showing GUE statistics)
11
+ - Estimate probability of "RH-violating" configurations (anomalous spacings)
12
+
13
+ The cross-entropy method:
14
+ 1. Start with reference distribution (GUE Wigner surmise)
15
+ 2. Sample configurations
16
+ 3. Score each: how "RH-consistent" is it?
17
+ 4. Update reference distribution to increase probability of high-scoring configs
18
+ 5. Repeat until convergence
19
+
20
+ This gives us:
21
+ - An estimate of how "stable" the RH-consistent statistics are
22
+ - Identification of the most anomalous configurations
23
+ - A quantitative measure of how far zeros are from violating RH
24
+ """
25
+
26
+ import numpy as np
27
+ from typing import Dict, List
28
+ from scipy import stats
29
+
30
+
31
+ class CrossEntropyRHSampler:
32
+ """
33
+ Cross-entropy rare event simulation for zero spacing configurations.
34
+
35
+ Reference distribution: We model spacing distribution as a mixture
36
+ of GUE Wigner surmise and a perturbation. The CE method updates
37
+ the mixture weights to focus on extreme (anomalous) configurations.
38
+ """
39
+
40
+ def __init__(self, zeros: List[float]):
41
+ self.zeros = np.array(zeros)
42
+ self.spacings = np.diff(self.zeros)
43
+ self.normalized = self.spacings / np.mean(self.spacings)
44
+ self.results = {}
45
+
46
+ def _wigner_pdf(self, s: np.ndarray) -> np.ndarray:
47
+ """Wigner surmise PDF."""
48
+ return (np.pi / 2) * s * np.exp(-np.pi * s**2 / 4)
49
+
50
+ def _wigner_cdf(self, s: np.ndarray) -> np.ndarray:
51
+ """Wigner surmise CDF."""
52
+ return 1.0 - np.exp(-np.pi * s**2 / 4)
53
+
54
+ def _score_configuration(self, config: np.ndarray) -> float:
55
+ """
56
+ Score how RH-consistent a spacing configuration is.
57
+ Higher score = more consistent with GUE = more "RH-safe".
58
+
59
+ We use KS distance to Wigner surmise as the metric.
60
+ Lower KS = higher score.
61
+ """
62
+ if len(config) < 10:
63
+ return 0.0
64
+ ks_stat, _ = stats.kstest(config, lambda x: self._wigner_cdf(np.array(x)))
65
+ return max(0, 1.0 - ks_stat) # Higher = better match
66
+
67
+ def _sample_from_mixture(self, n: int, wigner_weight: float = 0.9,
68
+ anomalous_weight: float = 0.1) -> np.ndarray:
69
+ """Sample spacings from mixture of Wigner + anomalous distribution."""
70
+ # Sample from Wigner using inverse CDF
71
+ u = np.random.uniform(0, 1, int(n * wigner_weight))
72
+ # Inverse CDF of Wigner: F^{-1}(u) = sqrt(-4*log(1-u)/pi)
73
+ wigner_samples = np.sqrt(-4 * np.log(1 - u + 1e-15) / np.pi)
74
+
75
+ # Anomalous: sample from broader distribution (Gamma)
76
+ n_anom = int(n * anomalous_weight)
77
+ anom_samples = np.random.gamma(2, 1.0, n_anom)
78
+
79
+ samples = np.concatenate([wigner_samples, anom_samples])
80
+ np.random.shuffle(samples)
81
+ return samples[:n]
82
+
83
+ def run_simulation(self, n_iterations: int = 10, n_samples: int = 1000,
84
+ window_size: int = 500, elite_fraction: float = 0.1) -> Dict:
85
+ """
86
+ Run cross-entropy rare event simulation.
87
+
88
+ At each iteration:
89
+ 1. Sample configurations from current mixture
90
+ 2. Score them
91
+ 3. Update mixture weights based on elite samples
92
+ 4. Track convergence
93
+ """
94
+ print(f" [CrossEntropy] Running {n_iterations} CE iterations...")
95
+
96
+ # Start with actual zero spacings as reference
97
+ reference_config = self.normalized[:window_size]
98
+
99
+ iteration_scores = []
100
+ wigner_weights = []
101
+ best_configs = []
102
+
103
+ wigner_weight = 0.9 # Initial: mostly GUE-like
104
+
105
+ for iteration in range(n_iterations):
106
+ scores = []
107
+ configs = []
108
+
109
+ for _ in range(n_samples):
110
+ # Sample configuration
111
+ if iteration == 0:
112
+ # First iteration: use actual spacings with noise
113
+ config = reference_config + np.random.normal(0, 0.05, len(reference_config))
114
+ else:
115
+ config = self._sample_from_mixture(window_size, wigner_weight)
116
+
117
+ # Normalize
118
+ config = config / np.mean(config)
119
+ score = self._score_configuration(config)
120
+ scores.append(score)
121
+ configs.append(config)
122
+
123
+ scores = np.array(scores)
124
+
125
+ # Select elite (top configurations)
126
+ elite_threshold = np.percentile(scores, (1 - elite_fraction) * 100)
127
+ elite_indices = np.where(scores >= elite_threshold)[0]
128
+ elite_configs = [configs[i] for i in elite_indices]
129
+
130
+ # Update reference: fit new Wigner weight to elite configs
131
+ # Compute average KS distance of elite configs
132
+ elite_ks = [self._score_configuration(c) for c in elite_configs]
133
+ avg_elite_score = np.mean(elite_ks)
134
+
135
+ # Adapt mixture weight: if elite configs are very GUE-like,
136
+ # increase Wigner weight; if diverse, decrease it
137
+ if avg_elite_score > 0.95:
138
+ wigner_weight = min(0.99, wigner_weight + 0.02)
139
+ else:
140
+ wigner_weight = max(0.5, wigner_weight - 0.02)
141
+
142
+ iteration_scores.append(float(avg_elite_score))
143
+ wigner_weights.append(float(wigner_weight))
144
+
145
+ # Save best
146
+ best_idx = np.argmax(scores)
147
+ best_configs.append({
148
+ 'score': float(scores[best_idx]),
149
+ 'mean': float(np.mean(configs[best_idx])),
150
+ 'std': float(np.std(configs[best_idx])),
151
+ 'min': float(np.min(configs[best_idx])),
152
+ 'max': float(np.max(configs[best_idx])),
153
+ })
154
+
155
+ if iteration % 3 == 0:
156
+ print(f" Iter {iteration}: avg elite score={avg_elite_score:.4f}, "
157
+ f"wigner_weight={wigner_weight:.3f}")
158
+
159
+ # Compute probability of "extreme" configurations
160
+ # (configs with score < 0.5 = significantly non-GUE)
161
+ final_scores = []
162
+ for _ in range(n_samples):
163
+ config = self._sample_from_mixture(window_size, wigner_weight)
164
+ config = config / np.mean(config)
165
+ final_scores.append(self._score_configuration(config))
166
+
167
+ extreme_prob = np.mean(np.array(final_scores) < 0.5)
168
+
169
+ self.results = {
170
+ 'strategy': 'cross_entropy_rare_event_simulation',
171
+ 'n_iterations': n_iterations,
172
+ 'n_samples': n_samples,
173
+ 'window_size': window_size,
174
+ 'iteration_scores': iteration_scores,
175
+ 'wigner_weights': wigner_weights,
176
+ 'best_configs': best_configs,
177
+ 'final_wigner_weight': float(wigner_weight),
178
+ 'extreme_non_gue_probability': float(extreme_prob),
179
+ 'interpretation': "Low extreme_prob = GUE statistics are stable. High = potential anomalies.",
180
+ }
181
+ return self.results
182
+
183
+ def summary(self) -> str:
184
+ r = self.results
185
+ s = f"Cross-Entropy Rare Event Simulation\n{'='*50}\n"
186
+ s += f"Iterations: {r['n_iterations']}, Samples/iter: {r['n_samples']}\n"
187
+ s += f"Final Wigner weight: {r['final_wigner_weight']:.3f}\n"
188
+ s += f"Elite score progression: {[round(x, 3) for x in r['iteration_scores']]}\n"
189
+ s += f"P(extreme non-GUE config): {r['extreme_non_gue_probability']:.4f}\n"
190
+ s += f"Interpretation: {r['interpretation']}\n"
191
+ if r['extreme_non_gue_probability'] < 0.01:
192
+ s += "→ GUE statistics are HIGHLY stable — consistent with RH.\n"
193
+ return s