File size: 4,743 Bytes
494c9e4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | /**
* Token 文本显示工具:特殊字符可视化、HTML 转义
* 与 Tooltip、TopK 图表等共享
*/
function escapeHtmlImpl(text: string): string {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
function isWhitespaceChar(char: string): boolean {
return /\p{White_Space}/u.test(char);
}
function isPrintableChar(char: string): boolean {
if (isWhitespaceChar(char)) return false;
const codePoint = char.codePointAt(0);
if (codePoint === undefined) return false;
if (codePoint >= 32 && codePoint <= 126) return true;
if (
(codePoint >= 0x00A0 && codePoint <= 0x00FF) ||
(codePoint >= 0x0100 && codePoint <= 0x017F) ||
(codePoint >= 0x0180 && codePoint <= 0x024F) ||
(codePoint >= 0x2000 && codePoint <= 0x206F) ||
(codePoint >= 0x2070 && codePoint <= 0x209F) ||
(codePoint >= 0x20A0 && codePoint <= 0x20CF) ||
(codePoint >= 0x2100 && codePoint <= 0x214F) ||
(codePoint >= 0x2190 && codePoint <= 0x21FF) ||
(codePoint >= 0x2200 && codePoint <= 0x22FF) ||
(codePoint >= 0x2300 && codePoint <= 0x23FF) ||
(codePoint >= 0x2400 && codePoint <= 0x243F) ||
(codePoint >= 0x2E00 && codePoint <= 0x2E7F) ||
(codePoint >= 0x3000 && codePoint <= 0x303F) ||
(codePoint >= 0x3040 && codePoint <= 0x309F) ||
(codePoint >= 0x30A0 && codePoint <= 0x30FF) ||
(codePoint >= 0x4E00 && codePoint <= 0x9FFF) ||
(codePoint >= 0xAC00 && codePoint <= 0xD7AF) ||
(codePoint >= 0xF900 && codePoint <= 0xFAFF) ||
(codePoint >= 0xFF00 && codePoint <= 0xFFEF)
) return true;
return false;
}
/** {@link visualizeSpecialChars} 可选行为 */
export type VisualizeSpecialCharsOptions = {
/**
* 为 true(如 DAG 节点标签):仅当 ASCII 空格**后面不是** `[A-Za-z0-9]` 时改为 ·;
* 空格后是 ASCII 字母或数字时保留空格(便于 SVG 显示词界)。
* 省略或 false:每个 ASCII 空格都变为 ·(与 Tooltip / 候选词等一致)。
*/
spaceDotExceptBeforeAsciiLetterOrNumber?: boolean;
};
function visualizeSpecialCharsImpl(text: string, options?: VisualizeSpecialCharsOptions): string {
let result = text
.replace(/\r\n/g, '[CRLF]')
.replace(/\n/g, '[LF]')
.replace(/\r/g, '[CR]')
.replace(/\t/g, '[TAB]')
.replace(/\u3000/g, '[FS]');
if (options?.spaceDotExceptBeforeAsciiLetterOrNumber === true) {
// 须写成 (?![…]),勿写成 (?!=[…]):后者会解析成「否定先行 + 字面量 = + 字符类」,几乎总匹配,导致所有空格都变 ·。
result = result.replace(/ (?![A-Za-z0-9])/g, '·');
} else {
result = result.replace(/ /g, '·');
}
const processed: string[] = [];
let inBracket = false;
for (let i = 0; i < result.length; i++) {
const char = result[i];
if (char === '[') {
inBracket = true;
processed.push(char);
} else if (char === ']' && inBracket) {
processed.push(char);
inBracket = false;
} else if (inBracket) {
processed.push(char);
} else {
// 保留的空格不能走下方「不可打印 → U+」分支,否则会变成 [U+0020]
if (char === ' ') {
processed.push(char);
} else if (isPrintableChar(char)) {
processed.push(char);
} else {
const codePoint = char.codePointAt(0);
if (codePoint !== undefined) {
const hexCode = codePoint.toString(16).toUpperCase().padStart(4, '0');
processed.push(`[U+${hexCode}]`);
} else {
processed.push(char);
}
}
}
}
return processed.join('');
}
/** 处理候选词文本,与主 token 保持一致:先可视化特殊字符,再 HTML 转义 */
export function processCandidateText(text: string): string {
return escapeHtmlImpl(visualizeSpecialCharsImpl(text));
}
/**
* Tooltip 内展示的当前 token 与合并子片段共用:与 {@link processCandidateText} 同一管线,
* 保证与主栏「当前 token」行渲染一致。
*/
export function tooltipTokenDisplayHtml(text: string): string {
return processCandidateText(text);
}
/** HTML 转义 */
export function escapeHtml(text: string): string {
return escapeHtmlImpl(text);
}
/** 可视化特殊字符 */
export function visualizeSpecialChars(text: string, options?: VisualizeSpecialCharsOptions): string {
return visualizeSpecialCharsImpl(text, options);
}
|