ml-intern
swayam1111 commited on
Commit
42bc813
·
verified ·
1 Parent(s): 7729174

Upload problem_solvers/cross_module_analysis.py with huggingface_hub

Browse files
problem_solvers/cross_module_analysis.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CROSS-MODULE ANALYSES enabled by v_mix unification
3
+ ====================================================
4
+ 1. Spectral features (v2) → prime prediction accuracy (v3)
5
+ 2. Transfer learning: operator fitness predicts prime gap size
6
+ 3. Conjecture validation: use all 100k zeros to test generated conjectures
7
+ """
8
+
9
+ import numpy as np
10
+ from typing import Dict, List
11
+ from sklearn.ensemble import GradientBoostingRegressor
12
+ from sklearn.model_selection import train_test_split
13
+ from sklearn.metrics import mean_absolute_error
14
+
15
+
16
+ class CrossModuleAnalyzer:
17
+ """
18
+ Cross-module pipeline: do spectral features from zero distribution
19
+ improve ML prime prediction accuracy beyond raw zero oscillations?
20
+ """
21
+
22
+ def __init__(self, zeros: List[float]):
23
+ self.zeros = np.array(zeros)
24
+ self.results = {}
25
+
26
+ def _sieve_primes(self, limit: int) -> np.ndarray:
27
+ sieve = np.ones(limit + 1, dtype=bool)
28
+ sieve[:2] = False
29
+ for i in range(2, int(limit ** 0.5) + 1):
30
+ if sieve[i]:
31
+ sieve[i * i::i] = False
32
+ return np.where(sieve)[0]
33
+
34
+ def _compute_spectral_features(self, x: float, n_zeros: int = 500) -> np.ndarray:
35
+ """Compute spectral features (spacings, pair correlations) at position x."""
36
+ gamma_subset = self.zeros[:n_zeros]
37
+ log_x = np.log(max(x, 2))
38
+
39
+ # Zero oscillation contributions (v3-style)
40
+ contributions = []
41
+ for gamma in gamma_subset:
42
+ denom = 0.25 + gamma * gamma
43
+ cos_term = np.cos(gamma * log_x) * 0.5
44
+ sin_term = np.sin(gamma * log_x) * gamma
45
+ contributions.append(-2 * np.sqrt(x) * (cos_term + sin_term) / denom)
46
+
47
+ # Spectral features from local zero distribution
48
+ # Use window of 100 zeros around where γ ≈ x (conceptually)
49
+ target_idx = min(len(self.zeros) - 100, int(np.searchsorted(self.zeros, x) + 50))
50
+ local_zeros = self.zeros[target_idx:target_idx + 100]
51
+ local_spacings = np.diff(local_zeros)
52
+
53
+ features = [
54
+ np.mean(contributions),
55
+ np.std(contributions),
56
+ np.min(contributions),
57
+ np.max(contributions),
58
+ np.mean(local_spacings),
59
+ np.std(local_spacings),
60
+ np.min(local_spacings),
61
+ np.max(local_spacings),
62
+ x % 2,
63
+ x % 3,
64
+ x % 6,
65
+ np.log(x),
66
+ 1.0 / np.log(x + 1),
67
+ ]
68
+ return np.array(features)
69
+
70
+ def analyze_transfer_learning(self, train_limit: int = 50000) -> Dict:
71
+ """
72
+ Test: do spectral features improve prime gap prediction?
73
+ """
74
+ primes = self._sieve_primes(train_limit)
75
+ gaps = np.diff(primes)
76
+
77
+ # Build features at each prime
78
+ X = []
79
+ y = []
80
+ for i in range(0, min(len(gaps) - 1, 2000), 1): # sample for speed
81
+ p = primes[i]
82
+ feat = self._compute_spectral_features(float(p), n_zeros=200)
83
+ X.append(feat)
84
+ y.append(gaps[i])
85
+
86
+ X = np.array(X)
87
+ y = np.array(y)
88
+
89
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
90
+
91
+ # Model with spectral features
92
+ model = GradientBoostingRegressor(n_estimators=100, max_depth=4, random_state=42)
93
+ model.fit(X_train, y_train)
94
+ y_pred = model.predict(X_test)
95
+ mae_spectral = mean_absolute_error(y_test, y_pred)
96
+
97
+ # Baseline: just mean
98
+ baseline_mae = np.mean(np.abs(np.mean(y_train) - y_test))
99
+
100
+ # Feature importance
101
+ importance = model.feature_importances_.tolist()
102
+
103
+ self.results['transfer_learning'] = {
104
+ 'train_limit': train_limit,
105
+ 'n_samples': len(y),
106
+ 'mae_spectral': float(mae_spectral),
107
+ 'baseline_mae': float(baseline_mae),
108
+ 'improvement': float((baseline_mae - mae_spectral) / baseline_mae),
109
+ 'feature_importance': importance,
110
+ 'best_feature_idx': int(np.argmax(importance)),
111
+ }
112
+ return self.results
113
+
114
+ def analyze_conjecture_validation(self) -> Dict:
115
+ """
116
+ Validate a conjecture: 'larger spectral rigidity → smaller prime gaps'.
117
+ Compute Δ₃ statistic for windows of zeros, correlate with prime gaps
118
+ in corresponding regions.
119
+ """
120
+ # Simplified: sample a few windows
121
+ window_sizes = [1000, 5000, 10000]
122
+ rigidity_scores = []
123
+ avg_gaps = []
124
+
125
+ primes = self._sieve_primes(100000)
126
+ all_gaps = np.diff(primes)
127
+
128
+ for w in window_sizes:
129
+ if w > len(self.zeros):
130
+ continue
131
+ local_spacings = np.diff(self.zeros[:w])
132
+ normalized = local_spacings / np.mean(local_spacings)
133
+ # Simple rigidity: variance of spacings (lower = more rigid)
134
+ rigidity = 1.0 / (np.var(normalized) + 0.1)
135
+ rigidity_scores.append(rigidity)
136
+ # Corresponding prime region (very rough correspondence)
137
+ avg_gap = np.mean(all_gaps[:min(w, len(all_gaps))])
138
+ avg_gaps.append(avg_gap)
139
+
140
+ if len(rigidity_scores) >= 2:
141
+ corr = float(np.corrcoef(rigidity_scores, avg_gaps)[0, 1])
142
+ else:
143
+ corr = 0.0
144
+
145
+ self.results['conjecture_validation'] = {
146
+ 'conjecture': 'Higher spectral rigidity → smaller prime gaps',
147
+ 'correlation': corr,
148
+ 'supported': abs(corr) > 0.5,
149
+ 'rigidity_scores': rigidity_scores,
150
+ 'avg_gaps': avg_gaps,
151
+ }
152
+ return self.results
153
+
154
+ def run_all(self) -> Dict:
155
+ print("\n[CROSS-MODULE] Transfer learning: spectral → prime gaps")
156
+ self.analyze_transfer_learning(train_limit=50000)
157
+ print("\n[CROSS-MODULE] Conjecture validation: rigidity ↔ gaps")
158
+ self.analyze_conjecture_validation()
159
+ return self.results
160
+
161
+ def summary(self) -> str:
162
+ r = self.results
163
+ s = f"Cross-Module Analysis\n{'='*50}\n"
164
+ if 'transfer_learning' in r:
165
+ tl = r['transfer_learning']
166
+ s += f"Transfer learning MAE: {tl['mae_spectral']:.2f} (baseline: {tl['baseline_mae']:.2f})\n"
167
+ s += f"Improvement: {tl['improvement']:.1%}\n"
168
+ s += f"Best feature index: {tl['best_feature_idx']}\n"
169
+ if 'conjecture_validation' in r:
170
+ cv = r['conjecture_validation']
171
+ s += f"Conjecture: {cv['conjecture']}\n"
172
+ s += f"Correlation: {cv['correlation']:.4f} → {'SUPPORTED' if cv['supported'] else 'NOT SUPPORTED'}\n"
173
+ return s