| import { |
| readStoredEffectiveExcludeGeneratedPatternsText, |
| readStoredEffectiveExcludePromptPatternsText, |
| } from './attributionExcludePromptPatternsStorage'; |
| import { |
| collectExcludeRegexMatchIntervals, |
| isOffsetSpanFullyExcluded, |
| } from './attributionDisplayModel'; |
| import type { NodeAggregatedEntry } from './genAttributeDagIntervalResolve'; |
| import type { TokenGenStep } from './tokenGenAttributionRunner'; |
| import { getAttentionRawScore } from '../utils/semanticUtils'; |
| import { DAG_EDGE_MIN_DISPLAY_OPACITY } from './genAttributeDagEdgeDisplay'; |
|
|
| |
| export type PromptTokenSpan = { |
| offset: [number, number]; |
| raw: string; |
| |
| token_id?: number; |
| }; |
|
|
| |
| |
| const DAG_EDGE_TOP_N = 10; |
|
|
| |
| export const DAG_EDGE_TOP_P_COVERAGE_DEFAULT = 0.7; |
| const DAG_EDGE_TOP_P_COVERAGE_MIN = 0.05; |
| const DAG_EDGE_TOP_P_COVERAGE_MAX = 1; |
|
|
| export function clampDagEdgeTopPCoverage(n: number): number { |
| if (!Number.isFinite(n)) return DAG_EDGE_TOP_P_COVERAGE_DEFAULT; |
| return Math.min(DAG_EDGE_TOP_P_COVERAGE_MAX, Math.max(DAG_EDGE_TOP_P_COVERAGE_MIN, n)); |
| } |
|
|
| |
| |
| |
| |
| function selectTopNByScore<T extends { score: number }>(effective: T[], n: number): T[] { |
| effective.sort((a, b) => b.score - a.score); |
| return effective.slice(0, Math.min(n, effective.length)); |
| } |
|
|
| |
| type DagPoolNormRow<T> = T & { score: number; rawScore: number; poolMassFrac: number }; |
|
|
| |
| function normalizeTopNPoolForDagSparse<T extends { score: number }>(tokens: T[]): Array<DagPoolNormRow<T>> { |
| const max = Math.max(0, ...tokens.map((t) => t.score).filter(Number.isFinite)); |
| const positiveMass = tokens.map((t) => { |
| const s = t.score; |
| return Number.isFinite(s) ? Math.max(0, s) : 0; |
| }); |
| const massSum = positiveMass.reduce((a, v) => a + v, 0); |
| return tokens.map((t, i) => { |
| const rawScore = getAttentionRawScore(t); |
| const poolMassFrac = massSum > 0 ? positiveMass[i]! / massSum : 0; |
| const scoreNorm = max <= 0 ? t.score : t.score / max; |
| return { ...t, score: scoreNorm, rawScore, poolMassFrac }; |
| }); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| function selectTokenAttributionByCumulativeShare<T extends { poolMassFrac: number }>( |
| normalized: Array<T>, |
| cumulativeShareThreshold: number, |
| ): Array<T> { |
| if (normalized.length === 0) return []; |
|
|
| const topFrac = normalized[0]?.poolMassFrac ?? 0; |
| if (!(topFrac > 0)) return []; |
| const relativeFloor = DAG_EDGE_MIN_DISPLAY_OPACITY * topFrac; |
|
|
| let cum = 0; |
| const picked: Array<T> = []; |
| for (const t of normalized) { |
| const frac = t.poolMassFrac; |
| if (!(frac > 0)) { |
| break; |
| } |
| if (frac < relativeFloor) { |
| break; |
| } |
| picked.push(t); |
| cum += frac; |
| if (cum >= cumulativeShareThreshold) { |
| break; |
| } |
| } |
|
|
| return picked; |
| } |
|
|
| |
| |
| |
| |
| export function extractPromptTokenSpans(step: TokenGenStep): PromptTokenSpan[] { |
| const ta = step.response.token_attribution; |
| if (!ta?.length) return []; |
|
|
| const byKey = new Map<string, PromptTokenSpan>(); |
| for (const t of ta) { |
| const k = `${t.offset[0]}_${t.offset[1]}`; |
| if (!byKey.has(k)) { |
| byKey.set(k, { offset: t.offset, raw: t.raw }); |
| } |
| } |
| return [...byKey.values()]; |
| } |
|
|
| |
| export function collectGenAttrDagExcludeIntervals( |
| intervalCtx: string, |
| promptRegionEnd: number, |
| ): [number, number][] { |
| const pe = promptRegionEnd; |
| return [ |
| ...collectExcludeRegexMatchIntervals(intervalCtx, readStoredEffectiveExcludePromptPatternsText(), { |
| start: 0, |
| end: pe, |
| }), |
| ...collectExcludeRegexMatchIntervals(intervalCtx, readStoredEffectiveExcludeGeneratedPatternsText(), { |
| start: pe, |
| end: intervalCtx.length, |
| }), |
| ]; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| export function excludeNodeAggregatedEntries( |
| step: TokenGenStep, |
| entries: NodeAggregatedEntry[], |
| excludeIntervalContext?: string, |
| ): NodeAggregatedEntry[] { |
| if (!entries.length) return []; |
|
|
| const pe = step.promptRegionEnd; |
| const intervalCtx = excludeIntervalContext ?? step.context; |
| const excludeIntervals = collectGenAttrDagExcludeIntervals(intervalCtx, pe); |
| return entries.map((t) => { |
| const [ts, te] = t.offset; |
| const excluded = isOffsetSpanFullyExcluded(ts, te, excludeIntervals); |
| return { |
| ...t, |
| score: excluded ? 0 : t.score, |
| }; |
| }); |
| } |
|
|
| |
| export function phase2RankAndSparsify<T extends { score: number }>( |
| entries: T[], |
| options?: { cumulativeShare?: number }, |
| ): Array<T & { score: number; rawScore: number; poolMassFrac: number }> { |
| if (!entries.length) return []; |
| const topNPool = selectTopNByScore(entries, DAG_EDGE_TOP_N); |
| const normalized = normalizeTopNPoolForDagSparse(topNPool); |
| const threshold = |
| options?.cumulativeShare !== undefined |
| ? clampDagEdgeTopPCoverage(options.cumulativeShare) |
| : DAG_EDGE_TOP_P_COVERAGE_DEFAULT; |
| return selectTokenAttributionByCumulativeShare(normalized, threshold); |
| } |
|
|