File size: 4,991 Bytes
494c9e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/**
 * charIndexForByteLimit 单元桋试
 * 运葌: cd client/src && npx tsx ts/utils/charIndexForByteLimit.test.ts
 */
import { charIndexForByteLimit } from "./semanticUtils";

let passed = 0;
let failed = 0;

function assert(desc: string, actual: number, expected: number) {
    if (actual === expected) {
        console.log(`  βœ“ ${desc}`);
        passed++;
    } else {
        console.error(`  βœ— ${desc} β€” expected ${expected}, got ${actual}`);
        failed++;
    }
}

const enc = new TextEncoder();
function bytes(s: string) { return enc.encode(s).byteLength; }

// ── 1. εŸΊζœ¬θΎΉη•Œ ──────────────────────────────────────────────────────────────
console.log("1. εŸΊζœ¬θΎΉη•Œ");
assert("η©Ίε­—η¬¦δΈ²οΌŒlimit=0",        charIndexForByteLimit("", 0, 0), 0);
assert("η©Ίε­—η¬¦δΈ²οΌŒlimit=10",       charIndexForByteLimit("", 0, 10), 0);
assert("limit=0 ζ—Άη«‹εˆ»εœζ­’",       charIndexForByteLimit("abc", 0, 0), 0);
assert("limit 恰ε₯½η­‰δΊŽε…¨ζ–‡ε­—θŠ‚ζ•°", charIndexForByteLimit("abc", 0, 3), 3);
assert("limit ε€§δΊŽε…¨ζ–‡ε­—θŠ‚ζ•°",     charIndexForByteLimit("abc", 0, 100), 3);

// ── 2. ηΊ― ASCIIοΌˆζ―ε­—η¬¦ 1 ε­—θŠ‚οΌ‰β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
console.log("2. ηΊ― ASCII");
assert("limit=1 取 1 字符",  charIndexForByteLimit("hello", 0, 1), 1);
assert("limit=3 取 3 字符",  charIndexForByteLimit("hello", 0, 3), 3);
assert("limit=5 取全部",     charIndexForByteLimit("hello", 0, 5), 5);
assert("start=2,limit=2",   charIndexForByteLimit("hello", 2, 2), 4);

// ── 3. CJKοΌˆζ―ε­—η¬¦ 3 ε­—θŠ‚οΌ‰β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
console.log("3. CJK ε­—η¬¦οΌˆ3 ε­—θŠ‚/ε­—οΌ‰");
const cjk = "δ½ ε₯½δΈ–η•Œ"; // 4 ε­—η¬¦οΌŒ12 ε­—θŠ‚
assert("limit=3  β†’ 1 字符",  charIndexForByteLimit(cjk, 0, 3),  1);
assert("limit=4  β†’ 1 ε­—η¬¦οΌˆδΈ­ι—΄εˆ‡δΈεΌ€οΌ‰", charIndexForByteLimit(cjk, 0, 4), 1);
assert("limit=6  β†’ 2 字符",  charIndexForByteLimit(cjk, 0, 6),  2);
assert("limit=11 β†’ 3 字符",  charIndexForByteLimit(cjk, 0, 11), 3);
assert("limit=12 β†’ 4 字符",  charIndexForByteLimit(cjk, 0, 12), 4);
assert("start=1,limit=3 β†’ idx=2", charIndexForByteLimit(cjk, 1, 3), 2);

// ── 4. Emoji(4 ε­—θŠ‚οΌŒJS 代理对长度=2)──────────────────────────────────────
console.log("4. Emoji(4 ε­—θŠ‚/ε­—οΌŒJS charLen=2οΌ‰");
const emoji = "πŸ˜€πŸŽ‰πŸš€"; // 3 emoji,12 ε­—θŠ‚οΌŒJS length=6
assert("emoji limit=4  β†’ idx=2(1 emojiοΌ‰", charIndexForByteLimit(emoji, 0, 4),  2);
assert("emoji limit=5  β†’ idx=2οΌˆεˆ‡δΈεΌ€οΌ‰",  charIndexForByteLimit(emoji, 0, 5),  2);
assert("emoji limit=8  β†’ idx=4(2 emojiοΌ‰", charIndexForByteLimit(emoji, 0, 8),  4);
assert("emoji limit=12 β†’ idx=6οΌˆε…¨ιƒ¨οΌ‰",    charIndexForByteLimit(emoji, 0, 12), 6);
assert("emoji start=2,limit=4 β†’ idx=4",   charIndexForByteLimit(emoji, 2, 4),  4);

// ── 5. 混合 ASCII + CJK + Emoji ─────────────────────────────────────────────
console.log("5. ζ··εˆε­—η¬¦");
// "Aε₯½πŸ˜€" = 1+3+4 = 8 ε­—θŠ‚οΌŒJS length=4
const mixed = "Aε₯½πŸ˜€";
assert("混合 limit=1 β†’ 1(A)",       charIndexForByteLimit(mixed, 0, 1), 1);
assert("混合 limit=3 β†’ 1(A不借ε₯½)", charIndexForByteLimit(mixed, 0, 3), 1);
assert("混合 limit=4 β†’ 2(Aε₯½)",     charIndexForByteLimit(mixed, 0, 4), 2);
assert("混合 limit=7 β†’ 2(不借emoji)",charIndexForByteLimit(mixed, 0, 7), 2);
assert("混合 limit=8 β†’ 4(全部)",    charIndexForByteLimit(mixed, 0, 8), 4);
assert("混合 start=1,limit=3 β†’ 2(ε₯½)", charIndexForByteLimit(mixed, 1, 3), 2);

// ── 6. start θΆ…ε‡Ίζ–‡ζœ¬ζœ«ε°Ύ ────────────────────────────────────────────────────
console.log("6. start >= text.length");
assert("start=5 on 'abc' β†’ 5", charIndexForByteLimit("abc", 5, 10), 5);

// ── 7. 捒葌符(属于 ASCII,1 ε­—θŠ‚οΌ‰β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
console.log("7. 含捒葌符");
const nl = "a\nb\n";
assert("捒葌 limit=2 β†’ 2",  charIndexForByteLimit(nl, 0, 2), 2);
assert("捒葌 limit=3 β†’ 3",  charIndexForByteLimit(nl, 0, 3), 3);

// ── η»“ζžœζ±‡ζ€» ─────────────────────────────────────────────────────────────────
console.log(`\nη»“ζžœ: ${passed} ι€šθΏ‡ / ${failed} ε€±θ΄₯`);
if (failed > 0) process.exit(1);