ml-intern
riemann-vmix / problem_solvers /cross_module_analysis.py
swayam1111's picture
Upload problem_solvers/cross_module_analysis.py with huggingface_hub
42bc813 verified
"""
CROSS-MODULE ANALYSES enabled by v_mix unification
====================================================
1. Spectral features (v2) → prime prediction accuracy (v3)
2. Transfer learning: operator fitness predicts prime gap size
3. Conjecture validation: use all 100k zeros to test generated conjectures
"""
import numpy as np
from typing import Dict, List
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
class CrossModuleAnalyzer:
"""
Cross-module pipeline: do spectral features from zero distribution
improve ML prime prediction accuracy beyond raw zero oscillations?
"""
def __init__(self, zeros: List[float]):
self.zeros = np.array(zeros)
self.results = {}
def _sieve_primes(self, limit: int) -> np.ndarray:
sieve = np.ones(limit + 1, dtype=bool)
sieve[:2] = False
for i in range(2, int(limit ** 0.5) + 1):
if sieve[i]:
sieve[i * i::i] = False
return np.where(sieve)[0]
def _compute_spectral_features(self, x: float, n_zeros: int = 500) -> np.ndarray:
"""Compute spectral features (spacings, pair correlations) at position x."""
gamma_subset = self.zeros[:n_zeros]
log_x = np.log(max(x, 2))
# Zero oscillation contributions (v3-style)
contributions = []
for gamma in gamma_subset:
denom = 0.25 + gamma * gamma
cos_term = np.cos(gamma * log_x) * 0.5
sin_term = np.sin(gamma * log_x) * gamma
contributions.append(-2 * np.sqrt(x) * (cos_term + sin_term) / denom)
# Spectral features from local zero distribution
# Use window of 100 zeros around where γ ≈ x (conceptually)
target_idx = min(len(self.zeros) - 100, int(np.searchsorted(self.zeros, x) + 50))
local_zeros = self.zeros[target_idx:target_idx + 100]
local_spacings = np.diff(local_zeros)
features = [
np.mean(contributions),
np.std(contributions),
np.min(contributions),
np.max(contributions),
np.mean(local_spacings),
np.std(local_spacings),
np.min(local_spacings),
np.max(local_spacings),
x % 2,
x % 3,
x % 6,
np.log(x),
1.0 / np.log(x + 1),
]
return np.array(features)
def analyze_transfer_learning(self, train_limit: int = 50000) -> Dict:
"""
Test: do spectral features improve prime gap prediction?
"""
primes = self._sieve_primes(train_limit)
gaps = np.diff(primes)
# Build features at each prime
X = []
y = []
for i in range(0, min(len(gaps) - 1, 2000), 1): # sample for speed
p = primes[i]
feat = self._compute_spectral_features(float(p), n_zeros=200)
X.append(feat)
y.append(gaps[i])
X = np.array(X)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Model with spectral features
model = GradientBoostingRegressor(n_estimators=100, max_depth=4, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae_spectral = mean_absolute_error(y_test, y_pred)
# Baseline: just mean
baseline_mae = np.mean(np.abs(np.mean(y_train) - y_test))
# Feature importance
importance = model.feature_importances_.tolist()
self.results['transfer_learning'] = {
'train_limit': train_limit,
'n_samples': len(y),
'mae_spectral': float(mae_spectral),
'baseline_mae': float(baseline_mae),
'improvement': float((baseline_mae - mae_spectral) / baseline_mae),
'feature_importance': importance,
'best_feature_idx': int(np.argmax(importance)),
}
return self.results
def analyze_conjecture_validation(self) -> Dict:
"""
Validate a conjecture: 'larger spectral rigidity → smaller prime gaps'.
Compute Δ₃ statistic for windows of zeros, correlate with prime gaps
in corresponding regions.
"""
# Simplified: sample a few windows
window_sizes = [1000, 5000, 10000]
rigidity_scores = []
avg_gaps = []
primes = self._sieve_primes(100000)
all_gaps = np.diff(primes)
for w in window_sizes:
if w > len(self.zeros):
continue
local_spacings = np.diff(self.zeros[:w])
normalized = local_spacings / np.mean(local_spacings)
# Simple rigidity: variance of spacings (lower = more rigid)
rigidity = 1.0 / (np.var(normalized) + 0.1)
rigidity_scores.append(rigidity)
# Corresponding prime region (very rough correspondence)
avg_gap = np.mean(all_gaps[:min(w, len(all_gaps))])
avg_gaps.append(avg_gap)
if len(rigidity_scores) >= 2:
corr = float(np.corrcoef(rigidity_scores, avg_gaps)[0, 1])
else:
corr = 0.0
self.results['conjecture_validation'] = {
'conjecture': 'Higher spectral rigidity → smaller prime gaps',
'correlation': corr,
'supported': abs(corr) > 0.5,
'rigidity_scores': rigidity_scores,
'avg_gaps': avg_gaps,
}
return self.results
def run_all(self) -> Dict:
print("\n[CROSS-MODULE] Transfer learning: spectral → prime gaps")
self.analyze_transfer_learning(train_limit=50000)
print("\n[CROSS-MODULE] Conjecture validation: rigidity ↔ gaps")
self.analyze_conjecture_validation()
return self.results
def summary(self) -> str:
r = self.results
s = f"Cross-Module Analysis\n{'='*50}\n"
if 'transfer_learning' in r:
tl = r['transfer_learning']
s += f"Transfer learning MAE: {tl['mae_spectral']:.2f} (baseline: {tl['baseline_mae']:.2f})\n"
s += f"Improvement: {tl['improvement']:.1%}\n"
s += f"Best feature index: {tl['best_feature_idx']}\n"
if 'conjecture_validation' in r:
cv = r['conjecture_validation']
s += f"Conjecture: {cv['conjecture']}\n"
s += f"Correlation: {cv['correlation']:.4f}{'SUPPORTED' if cv['supported'] else 'NOT SUPPORTED'}\n"
return s