| """ |
| AlphaEvolve-style Evolutionary Formula Discovery for Zeros |
| ============================================================= |
| Inspired by arXiv:2511.02864 (AlphaEvolve, Terence Tao et al.) |
| |
| Key idea: Use an evolutionary coding agent to discover empirical formulas |
| for zeta zero statistics. Instead of searching in proof space, we search |
| in CODE space — Python expressions that approximate observed data. |
| |
| Fitness function: agreement with empirical zero statistics (pair correlation, |
| spacing distribution, etc.). |
| |
| This is the "AlphaFold for math" approach: end-to-end search for formulas |
| that match data, then verify if they generalize. |
| """ |
|
|
| import numpy as np |
| from typing import Dict, List, Callable |
| import random |
| import math |
|
|
|
|
| class FormulaGene: |
| """A candidate formula as a Python expression string.""" |
| def __init__(self, expression: str, fitness: float = None): |
| self.expression = expression |
| self.fitness = fitness |
|
|
| def __repr__(self): |
| return f"Gene({self.expression}, fitness={self.fitness})" |
|
|
|
|
| class AlphaFoldMathEvolver: |
| """ |
| Evolutionary search for formulas matching zero statistics. |
| |
| Target: Find a closed-form formula f(n) that approximates the n-th |
| zero spacing s_n = γ_{n+1} - γ_n. |
| |
| Expression grammar: |
| - Variables: n, log(n), sqrt(n) |
| - Constants: pi, e, numbers 1-10 |
| - Operations: +, -, *, /, exp, log, sqrt, sin, cos |
| |
| Fitness: L2 distance between f(n) and actual normalized spacings. |
| """ |
|
|
| def __init__(self, zeros: List[float]): |
| self.zeros = np.array(zeros) |
| self.spacings = np.diff(self.zeros) |
| self.normalized_spacings = self.spacings / np.mean(self.spacings) |
| self.results = {} |
|
|
| def _random_expression(self, depth: int = 0, max_depth: int = 4) -> str: |
| """Generate a random mathematical expression.""" |
| if depth >= max_depth or random.random() < 0.3: |
| |
| terminals = ['n', 'math.log(n+1)', 'math.sqrt(n)', |
| 'math.pi', 'math.e', '1', '2', '0.5'] |
| return random.choice(terminals) |
|
|
| |
| ops = ['+', '-', '*', '/'] |
| funcs = ['math.exp', 'math.log', 'math.sqrt', 'math.sin', 'math.cos'] |
|
|
| if random.random() < 0.5: |
| op = random.choice(ops) |
| left = self._random_expression(depth + 1, max_depth) |
| right = self._random_expression(depth + 1, max_depth) |
| |
| if op == '/': |
| return f"({left}) / ({right} + 0.001)" |
| return f"({left}) {op} ({right})" |
| else: |
| func = random.choice(funcs) |
| arg = self._random_expression(depth + 1, max_depth) |
| |
| if func == 'math.log': |
| return f"{func}(abs({arg}) + 0.001)" |
| return f"{func}({arg})" |
|
|
| def _evaluate_expression(self, expr: str, n_vals: np.ndarray) -> np.ndarray: |
| """Safely evaluate expression for array of n values.""" |
| try: |
| |
| namespace = {'n': n_vals, 'math': math, 'np': np} |
| result = eval(expr, {"__builtins__": {}}, namespace) |
| result = np.array(result, dtype=float) |
| |
| if result.ndim == 0: |
| result = np.full(len(n_vals), float(result)) |
| |
| result = np.where(np.isfinite(result), result, 0) |
| return result |
| except Exception: |
| return np.zeros(len(n_vals)) |
|
|
| def _fitness(self, expr: str, sample_size: int = 1000) -> float: |
| """Compute fitness: negative MSE between formula and actual spacings.""" |
| n_vals = np.arange(1, min(sample_size + 1, len(self.normalized_spacings))) |
| predicted = self._evaluate_expression(expr, n_vals) |
| actual = self.normalized_spacings[:len(n_vals)] |
|
|
| if len(predicted) != len(actual): |
| return -1e10 |
|
|
| |
| if np.std(predicted) > 0: |
| predicted = (predicted - np.mean(predicted)) / np.std(predicted) |
| predicted = predicted * np.std(actual) + np.mean(actual) |
|
|
| mse = np.mean((predicted - actual) ** 2) |
| |
| complexity_penalty = len(expr) * 0.0001 |
| return -mse - complexity_penalty |
|
|
| def _mutate(self, expr: str) -> str: |
| """Randomly mutate an expression.""" |
| mutations = [ |
| lambda e: e + f" + {random.choice(['1', '0.1', 'n*0.01'])}", |
| lambda e: f"math.sin({e})" if 'sin' not in e else e, |
| lambda e: f"math.log(abs({e}) + 0.001)" if 'log' not in e else e, |
| lambda e: f"({e}) * {random.choice(['0.9', '1.1', '2'])}", |
| lambda e: self._random_expression(depth=1, max_depth=3), |
| ] |
| return random.choice(mutations)(expr) |
|
|
| def _crossover(self, expr1: str, expr2: str) -> str: |
| """Combine two expressions.""" |
| if random.random() < 0.5: |
| return f"({expr1}) * 0.5 + ({expr2}) * 0.5" |
| return f"({expr1}) / (abs({expr2}) + 0.001)" |
|
|
| def evolve(self, population_size: int = 50, generations: int = 30, |
| sample_size: int = 500) -> Dict: |
| """Run evolutionary search.""" |
| print(f" [AlphaFoldMath] Evolving formulas for {sample_size} spacings...") |
|
|
| |
| population = [FormulaGene(self._random_expression()) for _ in range(population_size)] |
|
|
| best_fitness_history = [] |
|
|
| for gen in range(generations): |
| |
| for gene in population: |
| if gene.fitness is None: |
| gene.fitness = self._fitness(gene.expression, sample_size) |
|
|
| |
| population.sort(key=lambda g: g.fitness, reverse=True) |
| best_fitness_history.append(population[0].fitness) |
|
|
| if gen % 10 == 0: |
| print(f" Gen {gen}: best fitness = {population[0].fitness:.6f}") |
|
|
| |
| elite_count = max(1, population_size // 5) |
| new_population = population[:elite_count] |
|
|
| |
| while len(new_population) < population_size: |
| if random.random() < 0.7 and len(population) >= 2: |
| |
| p1, p2 = random.sample(population[:elite_count*2], 2) |
| child_expr = self._crossover(p1.expression, p2.expression) |
| else: |
| |
| parent = random.choice(population[:elite_count*2]) |
| child_expr = self._mutate(parent.expression) |
|
|
| new_population.append(FormulaGene(child_expr)) |
|
|
| population = new_population |
|
|
| |
| for gene in population: |
| if gene.fitness is None: |
| gene.fitness = self._fitness(gene.expression, sample_size) |
| population.sort(key=lambda g: g.fitness, reverse=True) |
|
|
| best = population[0] |
| n_vals = np.arange(1, min(sample_size + 1, len(self.normalized_spacings))) |
| predicted = self._evaluate_expression(best.expression, n_vals) |
|
|
| self.results = { |
| 'strategy': 'alphafold_math_evolutionary_formula', |
| 'population_size': population_size, |
| 'generations': generations, |
| 'best_expression': best.expression, |
| 'best_fitness': float(best.fitness), |
| 'fitness_history': [float(f) for f in best_fitness_history], |
| 'predicted_vs_actual': { |
| 'predicted_mean': float(np.mean(predicted)), |
| 'actual_mean': float(np.mean(self.normalized_spacings[:sample_size])), |
| 'predicted_std': float(np.std(predicted)), |
| 'actual_std': float(np.std(self.normalized_spacings[:sample_size])), |
| }, |
| 'interpretation': "Negative fitness = -MSE. Higher = better agreement.", |
| } |
| return self.results |
|
|
| def summary(self) -> str: |
| r = self.results |
| s = f"AlphaFold-Math Formula Evolver\n{'='*50}\n" |
| s += f"Best formula: {r['best_expression']}\n" |
| s += f"Best fitness: {r['best_fitness']:.6f}\n" |
| pv = r['predicted_vs_actual'] |
| s += f"Predicted mean={pv['predicted_mean']:.4f} vs actual={pv['actual_mean']:.4f}\n" |
| s += f"Predicted std={pv['predicted_std']:.4f} vs actual={pv['actual_std']:.4f}\n" |
| s += f"Note: This is an empirical formula discovered by evolution, not proven.\n" |
| return s |
|
|