/**
 * 信号阈值检测：自动找到「噪声/信号」边界
 *
 * 输入：raw score normed [0,1]
 *
 * API 分层：
 * - `findSignalThreshold`：仅截尾对数正态 + bin 扫描；任一步失败则返回 `null`（无 P90 回退）。
 * - `findSignalThresholdWithLog`：先调 `findSignalThreshold`；成功则原样返回；失败则用「全部有限 score」的 P90 作启发式阈值、
 *   `confidence=0`、`mu=0, sigma=1` 占位、`bins=[]`，并 `console.warn`；若无任何有效分数则 `null`。
 *
 * 算法概要（`findSignalThreshold`）：
 * 1. 预处理：丢弃非有限或 ≤ LN_EPS 的分数；排序后若 n < MIN_SAMPLE_SIZE 则返回 null。
 * 2. 迭代 0：用全部样本（P0=1）拟合截尾对数正态 (μ, σ)，从 startPercentile 分位 bin 起逐 bin 扫描
 *    - 每个 bin [τ_left, τ_right) 左闭右开：obsInBin = 该 bin 内观测计数，expInBin = n × (CDF(τ_right) - CDF(τ_left))
 *    - 纯噪声区：信号样本不在 bin 内 → excess ≈ 0
 *    - 到信号边界：bin 内出现超额样本 → excess 跃升
 *    - 不重叠扫描：bin 边界取相邻点几何均值（对数空间 midpoint），τ_right >= τ_left + MIN_BIN_WIDTH，obsInBin >= MIN_OBSERVED
 *    - 误报概率：cumulativeFalsePositiveProbability = ∏(1-Φ(excess_i))，excess>excessMin 时累积，否则重置
 *    - 当 cumulativeFalsePositiveProbability <= 1-SCAN_SATISFACTION_CONFIDENCE 时，取首次命中 bin 的左边界为阈值（保守）
 *    - 若全程无连续命中链，或链尾仍达不到早停置信度且无有效兜底，evaluateBins 返回 null
 * 3. 迭代 1..N：用 threshold 以下样本重拟合，再扫描；阈值变化不大则提前结束
 * 4. 任一轮出现以下任一情况则整条失败返回 null（不回退）：噪声样本数不足（refinement 时）、拟合失败、扫描无阈值、confidence < MIN_ACCEPTABLE_CONFIDENCE
 *
 * 与现有 lognormalFit 逻辑独立，未来可能替换现有拟合代码
 */

import { quantileSorted } from 'd3-array';
import { fitLogNormalTruncatedMLE, logNormalExpectedCountInInterval, normCdf, LN_EPS } from './lognormalFit';
import { computeFitQuality } from './fitQuality';

/** 扫描置信度阈值，达到此值即判定「确定找到」信号边界；默认 0.99999 */
const SCAN_SATISFACTION_CONFIDENCE = 0.99999;
/** 最小可接受置信度：每轮扫描得到 threshold 后若低于此值则整条失败；与 SCAN_SATISFACTION_CONFIDENCE（扫描早停）不同 */
const MIN_ACCEPTABLE_CONFIDENCE = 0.9;
/** excess 最小阈值，排除无意义随机波动；需 excess > 此值才计为命中 */
const EXCESS_MIN = 0.1;
const MIN_OBSERVED = 1; // 每个 bin 至少 N 个观测
const MIN_BIN_WIDTH = 0.01; // bin 最小宽度；边界取相邻点几何均值（对数空间 midpoint）
const MIN_SAMPLE_SIZE = 20;
const P0 = 1; // 迭代初始的样本拟合比例
const MAX_REFINE_ITER = 10;
const THRESHOLD_CONVERGE_EPS = 0.01; //迭代收敛阈值
/** 扫描起始分位，默认 0.5（从 50% 分位所在 bin 开始） */
const START_PERCENTILE_DEFAULT = 0.5;
/** expInBin 最小有效值，避免除零或数值不稳定 */
const EXP_IN_BIN_EPS = 1e-10;

/** 内部：evaluateBins 的中间结果，仅 threshold + confidence */
interface SignalThresholdScanResult {
    threshold: number;
    confidence: number;
}

/** 对外：findSignalThreshold 成功或 findSignalThresholdWithLog 的 P90 回退 */
export interface signalFitResult {
    threshold: number;
    /** 0~1：成功时为 1-误报概率（≥ MIN_ACCEPTABLE）；P90 回退时为 0 */
    confidence: number;
    /** 成功时为截尾对数正态 μ；P90 回退时为 0（占位，勿用于拟合曲线） */
    mu: number;
    /** 成功时为截尾对数正态 σ；P90 回退时为 1（占位） */
    sigma: number;
    /** 成功时为各 bin 的 expInBin 等；P90 回退为空数组 */
    bins: SignalThresholdBin[];
}

export interface SignalThresholdBin {
    tauLeft: number;
    tauRight: number;
    obsInBin: number;
    expInBin: number;
}

/** 内部：bin 结构（tauLeft/tauRight/obsInBin）仅依赖 sorted，迭代间不变 */
interface BinStructure {
    tauLeft: number;
    tauRight: number;
    obsInBin: number;
}

const TAU_RIGHT_EPSILON = 1e-6;

const PERCENTILE_DIAGNOSTICS = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 1] as const;

/** P90 回退日志：原文首尾展示长度（UTF-16 码元） */
const FALLBACK_TEXT_HEAD_LEN = 24;
const FALLBACK_TEXT_TAIL_LEN = 24;

function concatTokenRawText(tokens: Array<{ raw?: string }>): string {
    return tokens.map((t) => t.raw ?? '').join('');
}

/** 日志用：总长 ≤ headLen+tailLen+1 时原样返回；否则前 headLen、换行、省略号、换行、后 tailLen */
function formatFallbackTextExcerpt(text: string, headLen: number, tailLen: number): string {
    const maxShort = headLen + tailLen + 1;
    if (text.length <= maxShort) {
        return `${text}`;
    }
    return `${text.slice(0, headLen)}\n……\n${text.slice(-tailLen)}`;
}

/** 计算 excess = (obs - exp) / sqrt(exp)，exp 过小时避免除零 */
function computeExcess(obsInBin: number, expInBin: number): number {
    if (expInBin <= EXP_IN_BIN_EPS) return obsInBin > 0 ? Infinity : 0;
    return (obsInBin - expInBin) / Math.sqrt(expInBin);
}

/** 打印不同分位数下的拟合结果，用于验证渐近一致性 */
function logPercentileDiagnostics(scores: number[]): void {
    const sorted = [...scores].sort((a, b) => a - b);
    const n = sorted.length;
    if (n < 2) return;
    const rows: Array<{ p: number; n: number; mu: number; sigma: number }> = [];
    for (const p of PERCENTILE_DIAGNOSTICS) {
        const pIdx = Math.max(1, Math.min(n, Math.round(n * p)));
        const noiseNorm = sorted.slice(0, pIdx);
        const tau = pIdx < n ? (sorted[pIdx - 1]! + sorted[pIdx]!) / 2 : sorted[pIdx - 1]!;
        const fit = fitLogNormalTruncatedMLE(noiseNorm, tau);
        if (fit) rows.push({ p, n: pIdx, mu: fit.mu, sigma: fit.sigma });
    }
    if (rows.length === 0) return;
    console.log('[signalThreshold] 渐近一致性诊断 (percentile → μ, σ)');
    for (const { p, n, mu, sigma } of rows) {
        console.log(`  p=${p} n=${n}: μ=${mu.toFixed(4)}, σ=${sigma.toFixed(4)}`);
    }
}
/** verbose 时打印完整 bin 扫描日志（独立于 evaluateBins，仅追加输出） */
function printBinScanLogs(bins: SignalThresholdBin[], excessMin: number): void {
    console.log('[signalThreshold] 完整扫描明细 τ_left | τ_right | obsInBin | expInBin | excess | binConf | hit | confidence');
    let cumulativeFalsePositiveProbability = 1;
    let firstHitTauLeft: number | null = null;
    for (const bin of bins) {
        const excess = computeExcess(bin.obsInBin, bin.expInBin);
        const hit = excess > excessMin;
        const binConfidence = normCdf(excess);
        if (hit) {
            if (firstHitTauLeft === null) firstHitTauLeft = bin.tauLeft;
            cumulativeFalsePositiveProbability *= 1 - binConfidence;
            const confidence = 1 - cumulativeFalsePositiveProbability;
            console.log(`[signalThreshold]   ${bin.tauLeft.toFixed(4)} | ${bin.tauRight.toFixed(4)} | ${String(bin.obsInBin).padStart(7)} | ${bin.expInBin.toFixed(1).padStart(8)} | ${excess.toFixed(2).padStart(6)} | ${binConfidence.toFixed(4)} | ✓ | ${confidence.toFixed(4)}`);
        } else {
            cumulativeFalsePositiveProbability = 1;
            firstHitTauLeft = null;
            console.log(`[signalThreshold]   ${bin.tauLeft.toFixed(4)} | ${bin.tauRight.toFixed(4)} | ${String(bin.obsInBin).padStart(7)} | ${bin.expInBin.toFixed(1).padStart(8)} | ${excess.toFixed(2).padStart(6)} | ${binConfidence.toFixed(4)} |   | -`);
        }
    }
}

/** bin 边界取相邻点几何均值（对数空间 midpoint），τ_right >= τ_left + MIN_BIN_WIDTH，obsInBin >= MIN_OBSERVED；仅依赖 sorted，迭代间不变 */
function formBinStructures(sorted: number[]): BinStructure[] {
    const n = sorted.length;
    const mids: number[] = [];
    for (let i = 0; i < n - 1; i++) mids.push(Math.sqrt(sorted[i]! * sorted[i + 1]!));
    const structures: BinStructure[] = [];
    let tauLeft = sorted[0]! - TAU_RIGHT_EPSILON;

    while (tauLeft < sorted[n - 1]!) {
        let midIdx = mids.findIndex((m) => m >= tauLeft + MIN_BIN_WIDTH);
        let tauRight = midIdx >= 0 ? mids[midIdx]! : sorted[n - 1]! + TAU_RIGHT_EPSILON;

        let leftIdx = sorted.findIndex((v) => v >= tauLeft);
        let rightIdx = midIdx >= 0 ? sorted.findIndex((v) => v >= tauRight) : -1;
        let obsInBin = leftIdx < 0 ? 0 : rightIdx < 0 ? n - leftIdx : rightIdx - leftIdx;

        while (obsInBin < MIN_OBSERVED && midIdx >= 0 && midIdx < mids.length - 1) {
            midIdx++;
            tauRight = mids[midIdx]!;
            rightIdx = sorted.findIndex((v) => v >= tauRight);
            obsInBin = leftIdx < 0 ? 0 : rightIdx < 0 ? n - leftIdx : rightIdx - leftIdx;
        }
        if (obsInBin < MIN_OBSERVED) {
            tauRight = sorted[n - 1]! + TAU_RIGHT_EPSILON;
            rightIdx = -1;
            obsInBin = leftIdx < 0 ? 0 : n - leftIdx;
            if (obsInBin < MIN_OBSERVED) break;
        }

        structures.push({ tauLeft, tauRight, obsInBin });
        tauLeft = tauRight;
        if (tauRight >= sorted[n - 1]! + TAU_RIGHT_EPSILON) break;
    }
    return structures;
}

/** 遍历 bin 结构，按需计算 expInBin，返回阈值结果；通过 obsInBin 累积找到 startPercentile 分位对应 bin，从该 bin 开始扫描 */
function evaluateBins(
    structures: BinStructure[],
    n: number,
    mu: number,
    sigma: number,
    excessMin: number,
    confidenceThreshold: number,
    verbose: boolean,
    startPercentile: number
): SignalThresholdScanResult | null {
    let cumulativeFalsePositiveProbability = 1;
    let firstHitTauLeft: number | null = null;

    const K = Math.min(Math.floor((n - 1) * startPercentile), n - 1);
    let cumSum = 0;
    let startIdx = 0;
    for (let i = 0; i < structures.length; i++) {
        if (K < cumSum + structures[i]!.obsInBin) {
            startIdx = i;
            break;
        }
        cumSum += structures[i]!.obsInBin;
    }
    const structuresToScan = structures.slice(startIdx);

    if (verbose) {
        console.log('[signalThreshold] 扫描明细 τ_left | τ_right | obsInBin | expInBin | excess | binConf | hit | confidence');
    }

    for (const s of structuresToScan) {
        const expInBin = logNormalExpectedCountInInterval(s.tauLeft, s.tauRight, n, mu, sigma);
        const excess = computeExcess(s.obsInBin, expInBin);
        const hit = excess > excessMin;
        const binConfidence = normCdf(excess);

        if (hit) {
            if (firstHitTauLeft === null) firstHitTauLeft = s.tauLeft;
            cumulativeFalsePositiveProbability *= 1 - binConfidence;
            const confidence = 1 - cumulativeFalsePositiveProbability;
            if (verbose) {
                console.log(`[signalThreshold]   ${s.tauLeft.toFixed(4)} | ${s.tauRight.toFixed(4)} | ${String(s.obsInBin).padStart(7)} | ${expInBin.toFixed(1).padStart(8)} | ${excess.toFixed(2).padStart(6)} | ${binConfidence.toFixed(4)} | ✓ | ${confidence.toFixed(4)}`);
            }
            if (confidence >= confidenceThreshold) {
                return { threshold: firstHitTauLeft, confidence };
            }
        } else {
            cumulativeFalsePositiveProbability = 1;
            firstHitTauLeft = null;
            if (verbose) {
                console.log(`[signalThreshold]   ${s.tauLeft.toFixed(4)} | ${s.tauRight.toFixed(4)} | ${String(s.obsInBin).padStart(7)} | ${expInBin.toFixed(1).padStart(8)} | ${excess.toFixed(2).padStart(6)} | ${binConfidence.toFixed(4)} |   | -`);
            }
        }
    }

    if (firstHitTauLeft !== null) {
        return { threshold: firstHitTauLeft, confidence: 1 - cumulativeFalsePositiveProbability };
    }
    return null;
}

/**
 * 从 raw score normed 数组自动检测信号阈值（内部会丢弃 ≤ LN_EPS 的样本后再算 n 与拟合）
 * @param rawScoresNormed 归一化分数 [0,1]
 * @param verbose 是否输出详细日志，默认 false
 * @returns 成功时返回完整结果 { threshold, confidence, mu, sigma, bins }；任一轮失败（见文件头）时返回 null
 */
export function findSignalThreshold(
    rawScoresNormed: number[],
    verbose = false
): signalFitResult | null {
    const values = rawScoresNormed.filter(
        (s) => typeof s === 'number' && isFinite(s) && s > LN_EPS
    );
    const sorted = [...values].sort((a, b) => a - b);
    const n = sorted.length;

    if (n < MIN_SAMPLE_SIZE) {
        if (verbose) console.log('[signalThreshold] 样本不足 n<', MIN_SAMPLE_SIZE, '，跳过');
        return null;
    }

    const p0 = P0;
    const splitIdx = Math.max(1, Math.min(n, Math.round(n * p0)));
    if (verbose) console.log('[signalThreshold] n=', n, 'splitIdx=', splitIdx);

    let result: SignalThresholdScanResult | null = null;
    let lastFit = { mu: 0, sigma: 0 };
    const binStructures = formBinStructures(sorted);

    for (let iter = 0; iter <= MAX_REFINE_ITER; iter++) {
        if (iter > 0 && result === null) return null;
        const thresholdForNoise = result?.threshold ?? 0;
        const noiseSamples = iter === 0
            ? sorted.slice(0, splitIdx)
            : sorted.filter((x) => x <= thresholdForNoise);
        const tauBoundary = iter === 0
            ? (splitIdx < n ? (sorted[splitIdx - 1]! + sorted[splitIdx]!) / 2 : sorted[splitIdx - 1]!)
            : thresholdForNoise;

        if (iter > 0 && noiseSamples.length < MIN_SAMPLE_SIZE) {
            if (verbose) console.log('[signalThreshold] 迭代', iter, '失败：噪声样本数<', MIN_SAMPLE_SIZE);
            return null;
        }

        if (verbose && iter === 0) {
            const nInit = noiseSamples.length;
            const minN = noiseSamples[0]!, maxN = noiseSamples[nInit - 1]!;
            const midN = noiseSamples[Math.floor(nInit / 2)]!;
            console.log('[signalThreshold] 迭代 0 噪声样本 n=', nInit, 'min=', minN.toFixed(4), 'max=', maxN.toFixed(4), 'median=', midN.toFixed(4));
        }

        const fit = fitLogNormalTruncatedMLE(noiseSamples, tauBoundary);
        if (fit === null) {
            if (verbose) console.log('[signalThreshold] 迭代', iter, '失败：拟合失败');
            return null;
        }
        lastFit = { mu: fit.mu, sigma: fit.sigma };

        const q = computeFitQuality(noiseSamples, tauBoundary, fit.mu, fit.sigma);
        if (verbose) {
            console.log('[signalThreshold] 迭代', iter, '拟合 μ=', fit.mu.toFixed(4), 'σ=', fit.sigma.toFixed(4), '| maxDiff=', q.maxDiff.toFixed(4), 'RMSE=', q.rmse.toFixed(4));
            if (iter === 0) {
                console.log('[signalThreshold] 迭代', iter, '从', (START_PERCENTILE_DEFAULT * 100).toFixed(0), '% 分位 bin 开始扫描 (excess>', EXCESS_MIN, ', confidence>=', SCAN_SATISFACTION_CONFIDENCE, ')');
            }
        }

        const scanResult = evaluateBins(binStructures, n, fit.mu, fit.sigma, EXCESS_MIN, SCAN_SATISFACTION_CONFIDENCE, verbose, START_PERCENTILE_DEFAULT);
        if (scanResult === null) {
            if (verbose) console.log('[signalThreshold] 迭代', iter, '失败：未检测到阈值');
            return null;
        }

        if (scanResult.confidence < MIN_ACCEPTABLE_CONFIDENCE) {
            console.warn(
                '[signalThreshold] 迭代',
                iter,
                '失败：confidence <',
                MIN_ACCEPTABLE_CONFIDENCE,
                '。当前',
                scanResult.confidence.toFixed(4)
            );
            return null;
        }

        const savedThreshold = result?.threshold;
        result = scanResult;

        if (iter > 0 && savedThreshold !== undefined) {
            const delta = Math.abs(result.threshold - savedThreshold);
            if (verbose) {
                console.log('[signalThreshold] 迭代', iter, '新阈值=', result.threshold.toFixed(4), 'confidence=', result.confidence.toFixed(2), 'delta=', delta.toFixed(6));
            }
            if (delta < THRESHOLD_CONVERGE_EPS) {
                if (verbose) console.log('[signalThreshold] 迭代', iter, '收敛，最终阈值=', result.threshold.toFixed(4));
                break;
            }
            if (iter === MAX_REFINE_ITER && verbose) {
                console.log('[signalThreshold] 达到最大迭代次数，最终阈值=', result.threshold.toFixed(4));
            }
        } else if (verbose) {
            console.log('[signalThreshold] 迭代 0 检测到阈值', result.threshold.toFixed(4), 'confidence=', result.confidence.toFixed(2));
        }
    }

    const bins: SignalThresholdBin[] = binStructures.map((s) => ({
        ...s,
        expInBin: logNormalExpectedCountInInterval(s.tauLeft, s.tauRight, n, lastFit.mu, lastFit.sigma),
    }));
    if (verbose && bins.length > 0) {
        printBinScanLogs(bins, EXCESS_MIN);
        logPercentileDiagnostics(values);
    }
    if (result === null) return null;
    return { ...result, mu: lastFit.mu, sigma: lastFit.sigma, bins };
}

/** 读取 window.signalThresholdVerbose，默认 false */
function getVerboseFromWindow(): boolean {
    return !!(typeof window !== 'undefined' && (window as Window & { signalThresholdVerbose?: boolean }).signalThresholdVerbose);
}

/**
 * findSignalThreshold 的封装：调用后打印 [signalThreshold] 日志并返回结果。
 * 检测失败时返回 P90 分位为阈值的启发式结果（confidence=0，与成功拟合的 confidence≥MIN_ACCEPTABLE 区分），无有效分数时返回 null。
 */
export function findSignalThresholdWithLog(
    tokens: Array<{ score: number; raw?: string }>,
    verbose = getVerboseFromWindow()
): signalFitResult | null {
    const rawScoresNormed = tokens.map(t => t.score).filter((s): s is number => typeof s === 'number' && Number.isFinite(s));
    if (rawScoresNormed.length === 0) {
        console.warn('[signalThreshold] 无有效分数，跳过阈值');
        return null;
    }

    const result = findSignalThreshold(rawScoresNormed, verbose);
    if (result !== null) {
        if (verbose) {
            const t = result.threshold;
            const below = rawScoresNormed.filter((s) => s < t).length;
            const quantile = below / rawScoresNormed.length;
            console.log(
                '[signalThreshold]',
                `threshold=${t.toFixed(4)} confidence=${result.confidence.toFixed(2)} (quantile=${quantile.toFixed(4)}, ${below}/${rawScoresNormed.length} below) μ=${result.mu.toFixed(4)} σ=${result.sigma.toFixed(4)}`
            );
        }
        return result;
    }

    const sorted = [...rawScoresNormed].sort((a, b) => a - b);
    const p90 = quantileSorted(sorted, 0.9);
    const below = rawScoresNormed.filter((s) => s < p90).length;
    const quantile = below / rawScoresNormed.length;

    const rawText = concatTokenRawText(tokens);
    const textHint =
        rawText.length > 0
            ? ` | ${formatFallbackTextExcerpt(rawText, FALLBACK_TEXT_HEAD_LEN, FALLBACK_TEXT_TAIL_LEN)}`
            : '';
    console.warn(
        `[signalThreshold] 自动阈值检测失败，已使用 P90 分位作为启发式阈值（confidence=0）${textHint}`
    );
    if (verbose) {
        console.log(
            '[signalThreshold]',
            `threshold=${p90.toFixed(4)} (P90 fallback) confidence=0.00 (quantile=${quantile.toFixed(4)}, ${below}/${rawScoresNormed.length} below) 无截尾对数正态拟合`
        );
    }

    return {
        threshold: p90,
        confidence: 0,
        mu: 0,
        sigma: 1,
        bins: [],
    };
}