ml-intern
File size: 4,661 Bytes
914569c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
PROBLEM 1: Cramér's Conjecture — Prime Gap Distribution
========================================================
BACKGROUND:
  Cramér (1936): lim sup g(p)/(ln p)² = 1
  Granville (1995): true constant is 2e^{-γ} ≈ 1.1229
  NOBODY HAS DISCRIMINATED BETWEEN THEM EMPIRICALLY.

HOW TO SOLVE:
  1. Compute prime gaps for primes up to 10^7
  2. Compute Cramér ratio g(p)/(ln p)² for ALL prime gaps
  3. Build histogram of ratio distribution
  4. Compare tail behavior to:
     - Cramér: P(ratio > λ) ~ e^{-λ}
     - Granville: P(ratio > λ) ~ e^{-2e^γ · λ} where 2e^γ ≈ 3.562
  5. Statistical test: which model fits the tail better?
"""

import numpy as np
from typing import Dict, List
from scipy import stats


class CramerGapAnalyzer:
    def __init__(self):
        self.results = {}
        self.primes = []
        self.gaps = []
        self.ratios = []

    def sieve_primes(self, limit: int) -> np.ndarray:
        sieve = np.ones(limit + 1, dtype=bool)
        sieve[:2] = False
        for i in range(2, int(limit ** 0.5) + 1):
            if sieve[i]:
                sieve[i * i::i] = False
        return np.where(sieve)[0]

    def analyze(self, limit: int = 5_000_000) -> Dict:
        print(f"  [Cramér] Sieving primes up to {limit:,}...")
        self.primes = self.sieve_primes(limit)
        self.gaps = np.diff(self.primes)
        log_primes = np.log(self.primes[1:])
        self.ratios = self.gaps / (log_primes ** 2)

        # Tail analysis
        thresholds = np.linspace(0.1, 2.0, 50)
        empirical_survival = []
        cramer_survival = []
        granville_survival = []

        two_e_gamma = 2.0 * np.exp(0.5772156649015329)  # ≈ 3.562

        for lam in thresholds:
            emp_surv = np.mean(self.ratios > lam)
            empirical_survival.append(emp_surv)
            cramer_survival.append(np.exp(-lam))
            granville_survival.append(np.exp(-two_e_gamma * lam))

        # Kolmogorov-Smirnov-like test on tail (λ > 0.5)
        tail_mask = thresholds > 0.5
        if np.sum(tail_mask) > 5:
            emp_tail = np.array(empirical_survival)[tail_mask]
            cra_tail = np.array(cramer_survival)[tail_mask]
            gra_tail = np.array(granville_survival)[tail_mask]
            # L2 tail error
            cra_error = np.sum((emp_tail - cra_tail) ** 2)
            gra_error = np.sum((emp_tail - gra_tail) ** 2)
        else:
            cra_error = gra_error = 0

        # Fit to Cramér model P(ratio > λ) ~ exp(-λ/c)
        valid = np.array(empirical_survival) > 0.001
        if np.sum(valid) > 5:
            log_emp = -np.log(np.array(empirical_survival)[valid])
            log_thresholds = thresholds[valid]
            cramer_fit = np.polyfit(log_thresholds, log_emp, 1)
            granville_fit = np.polyfit(log_thresholds, log_emp / two_e_gamma, 1) if two_e_gamma > 0 else [0, 0]
        else:
            cramer_fit = [1.0, 0.0]
            granville_fit = [1.0, 0.0]

        self.results = {
            'limit': limit,
            'n_primes': len(self.primes),
            'n_gaps': len(self.gaps),
            'max_ratio': float(np.max(self.ratios)),
            'mean_ratio': float(np.mean(self.ratios)),
            'std_ratio': float(np.std(self.ratios)),
            'max_gap': int(np.max(self.gaps)),
            'mean_gap': float(np.mean(self.gaps)),
            'thresholds': thresholds.tolist(),
            'empirical_survival': empirical_survival,
            'cramer_survival': cramer_survival,
            'granville_survival': granville_survival,
            'tail_error_cramer': float(cra_error),
            'tail_error_granville': float(gra_error),
            'better_model': 'Cramér' if cra_error < gra_error else 'Granville',
            'cramer_fit_slope': float(cramer_fit[0]),
            'granville_adjusted_slope': float(granville_fit[0]) if len(granville_fit) > 0 else 0,
        }
        return self.results

    def summary(self) -> str:
        r = self.results
        s = f"Cramér Gap Analysis (up to {r['limit']:,})\n{'='*50}\n"
        s += f"Primes: {r['n_primes']:,}, Gaps: {r['n_gaps']:,}\n"
        s += f"Max gap: {r['max_gap']}, Max ratio: {r['max_ratio']:.4f}\n"
        s += f"Mean gap: {r['mean_gap']:.2f}, Mean ratio: {r['mean_ratio']:.4f}\n"
        s += f"Tail fit: {r['better_model']} model is BETTER (lower L² error)\n"
        s += f"  Cramér tail L² error: {r['tail_error_cramer']:.6f}\n"
        s += f"  Granville tail L² error: {r['tail_error_granville']:.6f}\n"
        s += f"Note: Max observed ratio {r['max_ratio']:.4f} < 0.921 (record), insufficient to discriminate.\n"
        return s