| import type { FrontendAnalyzeResult, FrontendToken } from '../api/GLTR_API'; |
| import { TokenPositionCalculator } from '../vis/TokenPositionCalculator'; |
| import { ZERO_WIDTH_FRAGMENT_PLACEHOLDER_PX } from '../vis/types'; |
| import type { TokenFragmentRect } from '../vis/types'; |
| import { visualizeSpecialChars } from '../utils/tokenDisplayUtils'; |
| import type { PromptTokenSpan } from './genAttributeDagPreprocess'; |
|
|
| export type GenAttrDagTokenGeom = { |
| |
| originX: number; |
| originY: number; |
| width: number; |
| height: number; |
| }; |
|
|
| function offsetKey(off: [number, number]): string { |
| return `${off[0]}_${off[1]}`; |
| } |
|
|
| function fragmentsForToken( |
| positions: TokenFragmentRect[], |
| tokenIndex: number |
| ): TokenFragmentRect[] { |
| const parts = positions.filter((p) => p.tokenIndex === tokenIndex); |
| parts.sort((a, b) => a.fragmentIndex - b.fragmentIndex); |
| return parts; |
| } |
|
|
| |
| |
| |
| |
| function hasExpandingSpecialChar(raw: string): boolean { |
| return /[\x00-\x1f\x7f\u0085\u2028\u2029\u3000]/.test(raw); |
| } |
|
|
| |
| |
| |
| |
| function estimateExpandedLabelWidthFloorPx(raw: string): number { |
| const APPROX_CHAR_WIDTH_PX = 10; |
| const displayLabel = visualizeSpecialChars(raw, { |
| spaceDotExceptBeforeAsciiLetterOrNumber: true, |
| }); |
| const displayLen = Array.from(displayLabel).length; |
| return Math.max(displayLen * APPROX_CHAR_WIDTH_PX, 1); |
| } |
|
|
| |
| function isLineBreakOnlyToken(raw: string): boolean { |
| return /^[\n\r\u0085\u2028\u2029]+$/.test(raw); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| function fragmentsForDagGeom( |
| frags: TokenFragmentRect[], |
| raw: string |
| ): TokenFragmentRect[] { |
| if (isLineBreakOnlyToken(raw)) { |
| return frags; |
| } |
| const visible = frags.filter((f) => f.width > 0); |
| return visible.length > 0 ? visible : frags; |
| } |
|
|
| |
| function widthForDagGeom(frag: TokenFragmentRect): number { |
| return frag.width > 0 ? frag.width : ZERO_WIDTH_FRAGMENT_PLACEHOLDER_PX; |
| } |
|
|
| |
| |
| |
| |
| function geomFromTokenFragments(frags: TokenFragmentRect[], raw: string): GenAttrDagTokenGeom { |
| if (frags.length === 0) { |
| throw new Error('genAttributeDagTextMeasure: geomFromTokenFragments called with no fragments'); |
| } |
| const geomFrags = fragmentsForDagGeom(frags, raw); |
| const first = geomFrags[0]!; |
| const hFirst = Math.max(first.height, 1); |
| const geomWidthSum = geomFrags.reduce((s, f) => s + widthForDagGeom(f), 0); |
| const expandedFloor = hasExpandingSpecialChar(raw) |
| ? estimateExpandedLabelWidthFloorPx(raw) |
| : 1; |
| const widthSum = Math.max(geomWidthSum, expandedFloor); |
| return { |
| originX: first.x, |
| originY: first.y, |
| width: widthSum, |
| height: hFirst, |
| }; |
| } |
|
|
| function buildAnalyzeResult( |
| originalText: string, |
| bpe_strings: FrontendToken[] |
| ): FrontendAnalyzeResult { |
| return { |
| model: null, |
| error: null, |
| bpe_strings, |
| originalTokens: bpe_strings.map((t) => ({ ...t })), |
| bpeBpeMergedTokens: bpe_strings.map((t) => ({ ...t })), |
| originalText, |
| }; |
| } |
|
|
| |
| |
| |
| |
| export function createGenAttributeDagTextMeasure(measureRoot: HTMLElement): { |
| reset(): void; |
| setPrompt(promptText: string, spans: PromptTokenSpan[]): Map<string, GenAttrDagTokenGeom>; |
| appendGeneratedToken(token: string, offset: [number, number]): GenAttrDagTokenGeom; |
| } { |
| let fullText = ''; |
| let bpeStrings: FrontendToken[] = []; |
| const calculator = new TokenPositionCalculator(measureRoot); |
|
|
| function setMeasureText(text: string): void { |
| while (measureRoot.firstChild) measureRoot.removeChild(measureRoot.firstChild); |
| measureRoot.appendChild(document.createTextNode(text)); |
| calculator.resetIndex(); |
| } |
|
|
| function positionsForAnalyzeResult( |
| rd: FrontendAnalyzeResult, |
| fromTokenIndex = 0 |
| ): TokenFragmentRect[] { |
| return calculator.calculateTokenPositions(rd, fromTokenIndex); |
| } |
|
|
| return { |
| reset(): void { |
| fullText = ''; |
| bpeStrings = []; |
| while (measureRoot.firstChild) measureRoot.removeChild(measureRoot.firstChild); |
| calculator.resetIndex(); |
| }, |
|
|
| setPrompt(promptText: string, spans: PromptTokenSpan[]): Map<string, GenAttrDagTokenGeom> { |
| fullText = promptText; |
| bpeStrings = spans.map((s) => ({ |
| offset: s.offset, |
| raw: s.raw, |
| pred_topk: [], |
| })); |
| setMeasureText(fullText); |
|
|
| const rd = buildAnalyzeResult(fullText, bpeStrings); |
| const positions = positionsForAnalyzeResult(rd, 0); |
| const out = new Map<string, GenAttrDagTokenGeom>(); |
|
|
| for (let i = 0; i < bpeStrings.length; i++) { |
| const tok = bpeStrings[i]!; |
| const frags = fragmentsForToken(positions, i); |
| if (frags.length === 0) { |
| throw new Error( |
| `genAttributeDagTextMeasure: no layout fragment for prompt token index ${i} ` + |
| `(${offsetKey(tok.offset)})` |
| ); |
| } |
| out.set(offsetKey(tok.offset), geomFromTokenFragments(frags, tok.raw)); |
| } |
| return out; |
| }, |
|
|
| appendGeneratedToken(token: string, offset: [number, number]): GenAttrDagTokenGeom { |
| fullText += token; |
| bpeStrings.push({ offset, raw: token, pred_topk: [] }); |
| setMeasureText(fullText); |
|
|
| const rd = buildAnalyzeResult(fullText, bpeStrings); |
| const from = bpeStrings.length - 1; |
| const positions = positionsForAnalyzeResult(rd, from); |
| const frags = fragmentsForToken(positions, from); |
| if (frags.length === 0) { |
| throw new Error( |
| `genAttributeDagTextMeasure: no layout fragment for generated token (${offsetKey(offset)})` |
| ); |
| } |
| return geomFromTokenFragments(frags, token); |
| }, |
| }; |
| } |
|
|