swayam1111
/

riemann-vmix

ml-intern

Model card Files Files and versions

xet

Community

swayam1111 commited on 9 days ago

Commit

f7eba79

verified ·

1 Parent(s): cb07656

Upload problem_solvers/alphafold_math.py

Browse files

Files changed (1) hide show

problem_solvers/alphafold_math.py +212 -0

problem_solvers/alphafold_math.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""
+AlphaEvolve-style Evolutionary Formula Discovery for Zeros
+=============================================================
+Inspired by arXiv:2511.02864 (AlphaEvolve, Terence Tao et al.)
+Key idea: Use an evolutionary coding agent to discover empirical formulas
+for zeta zero statistics. Instead of searching in proof space, we search
+in CODE space — Python expressions that approximate observed data.
+Fitness function: agreement with empirical zero statistics (pair correlation,
+spacing distribution, etc.).
+This is the "AlphaFold for math" approach: end-to-end search for formulas
+that match data, then verify if they generalize.
+"""
+import numpy as np
+from typing import Dict, List, Callable
+import random
+import math
+class FormulaGene:
+    """A candidate formula as a Python expression string."""
+    def __init__(self, expression: str, fitness: float = None):
+        self.expression = expression
+        self.fitness = fitness
+    def __repr__(self):
+        return f"Gene({self.expression}, fitness={self.fitness})"
+class AlphaFoldMathEvolver:
+    """
+    Evolutionary search for formulas matching zero statistics.
+    Target: Find a closed-form formula f(n) that approximates the n-th
+    zero spacing s_n = γ_{n+1} - γ_n.
+    Expression grammar:
+        - Variables: n, log(n), sqrt(n)
+        - Constants: pi, e, numbers 1-10
+        - Operations: +, -, *, /, exp, log, sqrt, sin, cos
+    Fitness: L2 distance between f(n) and actual normalized spacings.
+    """
+    def __init__(self, zeros: List[float]):
+        self.zeros = np.array(zeros)
+        self.spacings = np.diff(self.zeros)
+        self.normalized_spacings = self.spacings / np.mean(self.spacings)
+        self.results = {}
+    def _random_expression(self, depth: int = 0, max_depth: int = 4) -> str:
+        """Generate a random mathematical expression."""
+        if depth >= max_depth or random.random() < 0.3:
+            # Terminal
+            terminals = ['n', 'math.log(n+1)', 'math.sqrt(n)',
+                         'math.pi', 'math.e', '1', '2', '0.5']
+            return random.choice(terminals)
+        # Non-terminal
+        ops = ['+', '-', '*', '/']
+        funcs = ['math.exp', 'math.log', 'math.sqrt', 'math.sin', 'math.cos']
+        if random.random() < 0.5:
+            op = random.choice(ops)
+            left = self._random_expression(depth + 1, max_depth)
+            right = self._random_expression(depth + 1, max_depth)
+            # Protect against division by zero
+            if op == '/':
+                return f"({left}) / ({right} + 0.001)"
+            return f"({left}) {op} ({right})"
+        else:
+            func = random.choice(funcs)
+            arg = self._random_expression(depth + 1, max_depth)
+            # Protect log domain
+            if func == 'math.log':
+                return f"{func}(abs({arg}) + 0.001)"
+            return f"{func}({arg})"
+    def _evaluate_expression(self, expr: str, n_vals: np.ndarray) -> np.ndarray:
+        """Safely evaluate expression for array of n values."""
+        try:
+            # Create safe namespace
+            namespace = {'n': n_vals, 'math': math, 'np': np}
+            result = eval(expr, {"__builtins__": {}}, namespace)
+            result = np.array(result, dtype=float)
+            # Ensure 1D array of correct length
+            if result.ndim == 0:
+                result = np.full(len(n_vals), float(result))
+            # Filter invalid values
+            result = np.where(np.isfinite(result), result, 0)
+            return result
+        except Exception:
+            return np.zeros(len(n_vals))
+    def _fitness(self, expr: str, sample_size: int = 1000) -> float:
+        """Compute fitness: negative MSE between formula and actual spacings."""
+        n_vals = np.arange(1, min(sample_size + 1, len(self.normalized_spacings)))
+        predicted = self._evaluate_expression(expr, n_vals)
+        actual = self.normalized_spacings[:len(n_vals)]
+        if len(predicted) != len(actual):
+            return -1e10
+        # Normalize predicted to match actual scale
+        if np.std(predicted) > 0:
+            predicted = (predicted - np.mean(predicted)) / np.std(predicted)
+            predicted = predicted * np.std(actual) + np.mean(actual)
+        mse = np.mean((predicted - actual) ** 2)
+        # Also reward simplicity (shorter expressions)
+        complexity_penalty = len(expr) * 0.0001
+        return -mse - complexity_penalty
+    def _mutate(self, expr: str) -> str:
+        """Randomly mutate an expression."""
+        mutations = [
+            lambda e: e + f" + {random.choice(['1', '0.1', 'n*0.01'])}",
+            lambda e: f"math.sin({e})" if 'sin' not in e else e,
+            lambda e: f"math.log(abs({e}) + 0.001)" if 'log' not in e else e,
+            lambda e: f"({e}) * {random.choice(['0.9', '1.1', '2'])}",
+            lambda e: self._random_expression(depth=1, max_depth=3),
+        ]
+        return random.choice(mutations)(expr)
+    def _crossover(self, expr1: str, expr2: str) -> str:
+        """Combine two expressions."""
+        if random.random() < 0.5:
+            return f"({expr1}) * 0.5 + ({expr2}) * 0.5"
+        return f"({expr1}) / (abs({expr2}) + 0.001)"
+    def evolve(self, population_size: int = 50, generations: int = 30,
+               sample_size: int = 500) -> Dict:
+        """Run evolutionary search."""
+        print(f"  [AlphaFoldMath] Evolving formulas for {sample_size} spacings...")
+        # Initialize population
+        population = [FormulaGene(self._random_expression()) for _ in range(population_size)]
+        best_fitness_history = []
+        for gen in range(generations):
+            # Evaluate fitness
+            for gene in population:
+                if gene.fitness is None:
+                    gene.fitness = self._fitness(gene.expression, sample_size)
+            # Sort by fitness
+            population.sort(key=lambda g: g.fitness, reverse=True)
+            best_fitness_history.append(population[0].fitness)
+            if gen % 10 == 0:
+                print(f"    Gen {gen}: best fitness = {population[0].fitness:.6f}")
+            # Selection: keep top 20%
+            elite_count = max(1, population_size // 5)
+            new_population = population[:elite_count]
+            # Generate offspring
+            while len(new_population) < population_size:
+                if random.random() < 0.7 and len(population) >= 2:
+                    # Crossover
+                    p1, p2 = random.sample(population[:elite_count*2], 2)
+                    child_expr = self._crossover(p1.expression, p2.expression)
+                else:
+                    # Mutation
+                    parent = random.choice(population[:elite_count*2])
+                    child_expr = self._mutate(parent.expression)
+                new_population.append(FormulaGene(child_expr))
+            population = new_population
+        # Final evaluation
+        for gene in population:
+            if gene.fitness is None:
+                gene.fitness = self._fitness(gene.expression, sample_size)
+        population.sort(key=lambda g: g.fitness, reverse=True)
+        best = population[0]
+        n_vals = np.arange(1, min(sample_size + 1, len(self.normalized_spacings)))
+        predicted = self._evaluate_expression(best.expression, n_vals)
+        self.results = {
+            'strategy': 'alphafold_math_evolutionary_formula',
+            'population_size': population_size,
+            'generations': generations,
+            'best_expression': best.expression,
+            'best_fitness': float(best.fitness),
+            'fitness_history': [float(f) for f in best_fitness_history],
+            'predicted_vs_actual': {
+                'predicted_mean': float(np.mean(predicted)),
+                'actual_mean': float(np.mean(self.normalized_spacings[:sample_size])),
+                'predicted_std': float(np.std(predicted)),
+                'actual_std': float(np.std(self.normalized_spacings[:sample_size])),
+            },
+            'interpretation': "Negative fitness = -MSE. Higher = better agreement.",
+        }
+        return self.results
+    def summary(self) -> str:
+        r = self.results
+        s = f"AlphaFold-Math Formula Evolver\n{'='*50}\n"
+        s += f"Best formula: {r['best_expression']}\n"
+        s += f"Best fitness: {r['best_fitness']:.6f}\n"
+        pv = r['predicted_vs_actual']
+        s += f"Predicted mean={pv['predicted_mean']:.4f} vs actual={pv['actual_mean']:.4f}\n"
+        s += f"Predicted std={pv['predicted_std']:.4f} vs actual={pv['actual_std']:.4f}\n"
+        s += f"Note: This is an empirical formula discovered by evolution, not proven.\n"
+        return s