ml-intern
swayam1111 commited on
Commit
f7eba79
·
verified ·
1 Parent(s): cb07656

Upload problem_solvers/alphafold_math.py

Browse files
Files changed (1) hide show
  1. problem_solvers/alphafold_math.py +212 -0
problem_solvers/alphafold_math.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AlphaEvolve-style Evolutionary Formula Discovery for Zeros
3
+ =============================================================
4
+ Inspired by arXiv:2511.02864 (AlphaEvolve, Terence Tao et al.)
5
+
6
+ Key idea: Use an evolutionary coding agent to discover empirical formulas
7
+ for zeta zero statistics. Instead of searching in proof space, we search
8
+ in CODE space — Python expressions that approximate observed data.
9
+
10
+ Fitness function: agreement with empirical zero statistics (pair correlation,
11
+ spacing distribution, etc.).
12
+
13
+ This is the "AlphaFold for math" approach: end-to-end search for formulas
14
+ that match data, then verify if they generalize.
15
+ """
16
+
17
+ import numpy as np
18
+ from typing import Dict, List, Callable
19
+ import random
20
+ import math
21
+
22
+
23
+ class FormulaGene:
24
+ """A candidate formula as a Python expression string."""
25
+ def __init__(self, expression: str, fitness: float = None):
26
+ self.expression = expression
27
+ self.fitness = fitness
28
+
29
+ def __repr__(self):
30
+ return f"Gene({self.expression}, fitness={self.fitness})"
31
+
32
+
33
+ class AlphaFoldMathEvolver:
34
+ """
35
+ Evolutionary search for formulas matching zero statistics.
36
+
37
+ Target: Find a closed-form formula f(n) that approximates the n-th
38
+ zero spacing s_n = γ_{n+1} - γ_n.
39
+
40
+ Expression grammar:
41
+ - Variables: n, log(n), sqrt(n)
42
+ - Constants: pi, e, numbers 1-10
43
+ - Operations: +, -, *, /, exp, log, sqrt, sin, cos
44
+
45
+ Fitness: L2 distance between f(n) and actual normalized spacings.
46
+ """
47
+
48
+ def __init__(self, zeros: List[float]):
49
+ self.zeros = np.array(zeros)
50
+ self.spacings = np.diff(self.zeros)
51
+ self.normalized_spacings = self.spacings / np.mean(self.spacings)
52
+ self.results = {}
53
+
54
+ def _random_expression(self, depth: int = 0, max_depth: int = 4) -> str:
55
+ """Generate a random mathematical expression."""
56
+ if depth >= max_depth or random.random() < 0.3:
57
+ # Terminal
58
+ terminals = ['n', 'math.log(n+1)', 'math.sqrt(n)',
59
+ 'math.pi', 'math.e', '1', '2', '0.5']
60
+ return random.choice(terminals)
61
+
62
+ # Non-terminal
63
+ ops = ['+', '-', '*', '/']
64
+ funcs = ['math.exp', 'math.log', 'math.sqrt', 'math.sin', 'math.cos']
65
+
66
+ if random.random() < 0.5:
67
+ op = random.choice(ops)
68
+ left = self._random_expression(depth + 1, max_depth)
69
+ right = self._random_expression(depth + 1, max_depth)
70
+ # Protect against division by zero
71
+ if op == '/':
72
+ return f"({left}) / ({right} + 0.001)"
73
+ return f"({left}) {op} ({right})"
74
+ else:
75
+ func = random.choice(funcs)
76
+ arg = self._random_expression(depth + 1, max_depth)
77
+ # Protect log domain
78
+ if func == 'math.log':
79
+ return f"{func}(abs({arg}) + 0.001)"
80
+ return f"{func}({arg})"
81
+
82
+ def _evaluate_expression(self, expr: str, n_vals: np.ndarray) -> np.ndarray:
83
+ """Safely evaluate expression for array of n values."""
84
+ try:
85
+ # Create safe namespace
86
+ namespace = {'n': n_vals, 'math': math, 'np': np}
87
+ result = eval(expr, {"__builtins__": {}}, namespace)
88
+ result = np.array(result, dtype=float)
89
+ # Ensure 1D array of correct length
90
+ if result.ndim == 0:
91
+ result = np.full(len(n_vals), float(result))
92
+ # Filter invalid values
93
+ result = np.where(np.isfinite(result), result, 0)
94
+ return result
95
+ except Exception:
96
+ return np.zeros(len(n_vals))
97
+
98
+ def _fitness(self, expr: str, sample_size: int = 1000) -> float:
99
+ """Compute fitness: negative MSE between formula and actual spacings."""
100
+ n_vals = np.arange(1, min(sample_size + 1, len(self.normalized_spacings)))
101
+ predicted = self._evaluate_expression(expr, n_vals)
102
+ actual = self.normalized_spacings[:len(n_vals)]
103
+
104
+ if len(predicted) != len(actual):
105
+ return -1e10
106
+
107
+ # Normalize predicted to match actual scale
108
+ if np.std(predicted) > 0:
109
+ predicted = (predicted - np.mean(predicted)) / np.std(predicted)
110
+ predicted = predicted * np.std(actual) + np.mean(actual)
111
+
112
+ mse = np.mean((predicted - actual) ** 2)
113
+ # Also reward simplicity (shorter expressions)
114
+ complexity_penalty = len(expr) * 0.0001
115
+ return -mse - complexity_penalty
116
+
117
+ def _mutate(self, expr: str) -> str:
118
+ """Randomly mutate an expression."""
119
+ mutations = [
120
+ lambda e: e + f" + {random.choice(['1', '0.1', 'n*0.01'])}",
121
+ lambda e: f"math.sin({e})" if 'sin' not in e else e,
122
+ lambda e: f"math.log(abs({e}) + 0.001)" if 'log' not in e else e,
123
+ lambda e: f"({e}) * {random.choice(['0.9', '1.1', '2'])}",
124
+ lambda e: self._random_expression(depth=1, max_depth=3),
125
+ ]
126
+ return random.choice(mutations)(expr)
127
+
128
+ def _crossover(self, expr1: str, expr2: str) -> str:
129
+ """Combine two expressions."""
130
+ if random.random() < 0.5:
131
+ return f"({expr1}) * 0.5 + ({expr2}) * 0.5"
132
+ return f"({expr1}) / (abs({expr2}) + 0.001)"
133
+
134
+ def evolve(self, population_size: int = 50, generations: int = 30,
135
+ sample_size: int = 500) -> Dict:
136
+ """Run evolutionary search."""
137
+ print(f" [AlphaFoldMath] Evolving formulas for {sample_size} spacings...")
138
+
139
+ # Initialize population
140
+ population = [FormulaGene(self._random_expression()) for _ in range(population_size)]
141
+
142
+ best_fitness_history = []
143
+
144
+ for gen in range(generations):
145
+ # Evaluate fitness
146
+ for gene in population:
147
+ if gene.fitness is None:
148
+ gene.fitness = self._fitness(gene.expression, sample_size)
149
+
150
+ # Sort by fitness
151
+ population.sort(key=lambda g: g.fitness, reverse=True)
152
+ best_fitness_history.append(population[0].fitness)
153
+
154
+ if gen % 10 == 0:
155
+ print(f" Gen {gen}: best fitness = {population[0].fitness:.6f}")
156
+
157
+ # Selection: keep top 20%
158
+ elite_count = max(1, population_size // 5)
159
+ new_population = population[:elite_count]
160
+
161
+ # Generate offspring
162
+ while len(new_population) < population_size:
163
+ if random.random() < 0.7 and len(population) >= 2:
164
+ # Crossover
165
+ p1, p2 = random.sample(population[:elite_count*2], 2)
166
+ child_expr = self._crossover(p1.expression, p2.expression)
167
+ else:
168
+ # Mutation
169
+ parent = random.choice(population[:elite_count*2])
170
+ child_expr = self._mutate(parent.expression)
171
+
172
+ new_population.append(FormulaGene(child_expr))
173
+
174
+ population = new_population
175
+
176
+ # Final evaluation
177
+ for gene in population:
178
+ if gene.fitness is None:
179
+ gene.fitness = self._fitness(gene.expression, sample_size)
180
+ population.sort(key=lambda g: g.fitness, reverse=True)
181
+
182
+ best = population[0]
183
+ n_vals = np.arange(1, min(sample_size + 1, len(self.normalized_spacings)))
184
+ predicted = self._evaluate_expression(best.expression, n_vals)
185
+
186
+ self.results = {
187
+ 'strategy': 'alphafold_math_evolutionary_formula',
188
+ 'population_size': population_size,
189
+ 'generations': generations,
190
+ 'best_expression': best.expression,
191
+ 'best_fitness': float(best.fitness),
192
+ 'fitness_history': [float(f) for f in best_fitness_history],
193
+ 'predicted_vs_actual': {
194
+ 'predicted_mean': float(np.mean(predicted)),
195
+ 'actual_mean': float(np.mean(self.normalized_spacings[:sample_size])),
196
+ 'predicted_std': float(np.std(predicted)),
197
+ 'actual_std': float(np.std(self.normalized_spacings[:sample_size])),
198
+ },
199
+ 'interpretation': "Negative fitness = -MSE. Higher = better agreement.",
200
+ }
201
+ return self.results
202
+
203
+ def summary(self) -> str:
204
+ r = self.results
205
+ s = f"AlphaFold-Math Formula Evolver\n{'='*50}\n"
206
+ s += f"Best formula: {r['best_expression']}\n"
207
+ s += f"Best fitness: {r['best_fitness']:.6f}\n"
208
+ pv = r['predicted_vs_actual']
209
+ s += f"Predicted mean={pv['predicted_mean']:.4f} vs actual={pv['actual_mean']:.4f}\n"
210
+ s += f"Predicted std={pv['predicted_std']:.4f} vs actual={pv['actual_std']:.4f}\n"
211
+ s += f"Note: This is an empirical formula discovered by evolution, not proven.\n"
212
+ return s