| import type { FrontendAnalyzeResult } from '../api/GLTR_API'; |
| import { calculateSurprisal, calculateSurprisalDensity } from './Util'; |
| import { extractRealTopkFromTokens } from './tokenUtils'; |
|
|
| |
| function valueInBinRange(value: number, x0: number, x1: number, binIndex: number, no_bins: number): boolean { |
| const isFirstBin = binIndex === 0; |
| const isLastBin = binIndex === no_bins - 1; |
| if (isFirstBin) return value < x1; |
| if (isLastBin) return value >= x0; |
| return value >= x0 && value < x1; |
| } |
|
|
| export type HistogramType = 'token' | 'byte' | 'raw_score_normed'; |
| export type HighlightData = FrontendAnalyzeResult & { |
| rawScoresNormed?: number[]; |
| attentionRawScores?: number[]; |
| signalProbs?: number[]; |
| pPwValues?: number[]; |
| pwScores?: number[]; |
| }; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export function calculateTokenSurprisalHighlights( |
| x0: number, |
| x1: number, |
| binIndex: number, |
| no_bins: number, |
| data: HighlightData |
| ): Set<number> { |
| const highlightedIndices = new Set<number>(); |
| const bpeBpeMergedTokens = data.bpeBpeMergedTokens; |
| if (!bpeBpeMergedTokens?.length) return highlightedIndices; |
|
|
| const mergedRealTopk = extractRealTopkFromTokens(bpeBpeMergedTokens); |
| for (let i = 0; i < bpeBpeMergedTokens.length; i++) { |
| const surprisal = calculateSurprisal(mergedRealTopk[i][1]); |
| if (!Number.isFinite(surprisal)) continue; |
| if (!valueInBinRange(surprisal, x0, x1, binIndex, no_bins)) continue; |
| highlightedIndices.add(i); |
| } |
| return highlightedIndices; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export function calculateByteSurprisalHighlights( |
| x0: number, |
| x1: number, |
| binIndex: number, |
| no_bins: number, |
| data: HighlightData |
| ): Set<number> { |
| const highlightedIndices = new Set<number>(); |
| const bpeBpeMergedTokens = data.bpeBpeMergedTokens; |
| if (!bpeBpeMergedTokens?.length) return highlightedIndices; |
|
|
| for (let i = 0; i < bpeBpeMergedTokens.length; i++) { |
| const informationDensity = calculateSurprisalDensity(bpeBpeMergedTokens[i]); |
| if (!Number.isFinite(informationDensity)) continue; |
| if (!valueInBinRange(informationDensity, x0, x1, binIndex, no_bins)) continue; |
| highlightedIndices.add(i); |
| } |
| return highlightedIndices; |
| } |
|
|
| |
| |
| |
| |
| export function calculateRawScoreNormedHighlights( |
| x0: number, |
| x1: number, |
| binIndex: number, |
| no_bins: number, |
| data: HighlightData |
| ): Set<number> { |
| const highlightedIndices = new Set<number>(); |
| const scores = data.rawScoresNormed; |
| if (!scores?.length) return highlightedIndices; |
|
|
| for (let i = 0; i < scores.length; i++) { |
| const score = scores[i]; |
| if (!Number.isFinite(score)) continue; |
| if (!valueInBinRange(score, x0, x1, binIndex, no_bins)) continue; |
| highlightedIndices.add(i); |
| } |
| return highlightedIndices; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export function calculateHighlights( |
| histogramType: HistogramType, |
| x0: number, |
| x1: number, |
| binIndex: number, |
| no_bins: number, |
| data: HighlightData |
| ): { indices: Set<number>; style: 'border' | 'underline' } { |
| if (histogramType === 'byte') { |
| return { |
| indices: calculateByteSurprisalHighlights(x0, x1, binIndex, no_bins, data), |
| style: 'underline' |
| }; |
| } |
| if (histogramType === 'raw_score_normed') { |
| return { |
| indices: calculateRawScoreNormedHighlights(x0, x1, binIndex, no_bins, data), |
| style: 'underline' |
| }; |
| } |
| return { |
| indices: calculateTokenSurprisalHighlights(x0, x1, binIndex, no_bins, data), |
| style: 'border' |
| }; |
| } |
|
|
|
|