InfoLens / client /src /ts /utils /lognormalFit.ts
dqy08's picture
initial beta release
494c9e4
/**
* 对数正态噪声拟合(纯数学,无依赖)
* 供 visualizationUpdater 使用,可独立在 Node 中测试
*/
export const LN_EPS = 1e-10;
/** 标准正态 CDF Φ(x),Abramowitz & Stegun 26.2.17 近似 */
export function normCdf(x: number): number {
if (x <= -6) return 0;
if (x >= 6) return 1;
const a1 = 0.254829592, a2 = -0.284496736, a3 = 1.421413741, a4 = -1.453152027, a5 = 1.061405429, p = 0.3275911;
const sign = x < 0 ? -1 : 1;
const t = 1 / (1 + p * Math.abs(x) / Math.SQRT2);
const y = 1 - (((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t) * Math.exp(-x * x / 2);
return 0.5 * (1 + sign * y);
}
/** 对数正态 CDF:F(x) = Φ((log(x) - μ) / σ),x > 0 */
export function logNormalCdf(x: number, mu: number, sigma: number): number {
if (x <= 0) return 0;
return normCdf((Math.log(x) - mu) / sigma);
}
/** 区间 [a, b) 在 log-normal(μ,σ) 下的期望计数:n × (CDF(b) - CDF(a)) */
export function logNormalExpectedCountInInterval(
a: number, b: number, n: number, mu: number, sigma: number
): number {
return n * (logNormalCdf(b, mu, sigma) - logNormalCdf(a, mu, sigma));
}
/** 对数正态 PDF:f(x) = φ((log(x)-μ)/σ) / (xσ),x > 0 */
export function logNormalPdf(x: number, mu: number, sigma: number): number {
if (x <= 0 || sigma <= 0) return 0;
const z = (Math.log(x) - mu) / sigma;
return normPdf(z) / (x * sigma);
}
/** 标准正态 PDF φ(x) */
function normPdf(x: number): number {
return Math.exp(-x * x / 2) / Math.sqrt(2 * Math.PI);
}
/** 逆 Mills 比率 λ(α) = φ(α)/Φ(α),α → −∞ 时近似 |α| */
function millsRatio(alpha: number): number {
const Phi = normCdf(alpha);
if (Phi < 1e-300) return Math.abs(alpha);
return normPdf(alpha) / Phi;
}
/**
* 截尾对数正态 MLE(右截尾于 τ)
* 导出供测试对比 tau=max(samples) vs tau=固定值
*/
export function fitLogNormalTruncatedMLE(
noiseScores: number[],
tau: number
): { mu: number; sigma: number } | null {
const n = noiseScores.length;
if (n < 2 || tau <= LN_EPS) return null;
const T = Math.log(tau);
const logData = noiseScores.map(x => Math.log(x));
const ybar = logData.reduce((a, b) => a + b, 0) / n;
const s2 = logData.reduce((a, x) => a + (x - ybar) ** 2, 0) / n;
const s = Math.sqrt(s2);
if (s <= 0 || !isFinite(s)) return null;
const delta = T - ybar;
const F = (alpha: number): number => {
const lam = millsRatio(alpha);
if (!isFinite(lam)) return delta > 0 ? -1 : 1;
const g = alpha + lam;
const h = 1 - lam * g;
if (h <= 0) return NaN;
return g - (delta / s) * Math.sqrt(h);
};
const lo0 = -8, hi0 = delta / s + 8;
const Flo = F(lo0), Fhi = F(hi0);
if (!isFinite(Flo) || !isFinite(Fhi) || Flo * Fhi > 0) return null;
let lo = lo0, hi = hi0, Flo_cur = Flo;
for (let i = 0; i < 60; i++) {
const mid = (lo + hi) / 2;
const Fmid = F(mid);
if (!isFinite(Fmid) || (hi - lo) < 1e-12) break;
if (Flo_cur * Fmid <= 0) { hi = mid; }
else { lo = mid; Flo_cur = Fmid; }
}
const alpha = (lo + hi) / 2;
const lam = millsRatio(alpha);
if (!isFinite(lam)) return null;
const h = 1 - lam * (alpha + lam);
if (h <= 0) return null;
const sigma = s / Math.sqrt(h);
const mu = ybar + sigma * lam;
if (!isFinite(sigma) || sigma <= 0 || !isFinite(mu)) return null;
return { mu, sigma };
}
/*
* todo: 未知原因的偏差现象:
* Monte Carlo 下 E[μ̂] 随 n 减小单调增大(系统性正偏),而非围绕真值的随机波动。
> inforadar@0.1.0 test:lognormal:tau
> npx tsx ts/utils/lognormalFit.tauBoundary.test.ts
=== 截尾对数正态拟合硬指标测试 ===
真实参数: μ=-2, σ=0.8, τ=1
Monte Carlo 500 次,fitLogNormalNoiseExpectedCounts percentile=0.9
n | E[μ̂] E[σ̂] Δμ Δσ
-------|------------------------------
1600 | -1.9977 0.8013 0.0023 0.0013
800 | -1.9950 0.8023 0.0050 0.0023
400 | -1.9910 0.8054 0.0090 0.0054
200 | -1.9851 0.8059 0.0149 0.0059
100 | -1.9722 0.8096 0.0278 0.0096
50 | -1.9541 0.8056 0.0459 0.0056
*/
/**
* 从 (μ, σ) 计算直方图各 bin 的期望计数
*/
export function computeExpectedCounts(
mu: number,
sigma: number,
extent: [number, number],
noBins: number,
n: number
): number[] {
const binWidth = (extent[1] - extent[0]) / noBins;
const expectedCounts: number[] = [];
for (let i = 0; i < noBins; i++) {
const a = extent[0] + i * binWidth;
const b = extent[0] + (i + 1) * binWidth;
const p = logNormalCdf(b, mu, sigma) - logNormalCdf(a, mu, sigma);
expectedCounts.push(n * p);
}
return expectedCounts;
}