File size: 1,070 Bytes
494c9e4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | /**
* 本地测试 splitTextToChunks
* 运行: cd client/src && npx tsx ts/utils/semanticUtils.chunk.test.ts [文件路径] [bytesPerChunk]
* 示例: npx tsx ts/utils/semanticUtils.chunk.test.ts chunk_test1.txt 512
*/
import * as fs from "fs";
import * as path from "path";
import { splitTextToChunks } from "./semanticUtils";
const defaultFile = path.resolve(__dirname, "./chunk_test1.txt");
const testFile = path.resolve(process.argv[2] || defaultFile);
const BYTES_PER_CHUNK = parseInt(process.argv[3] || "800", 10);
const text = fs.readFileSync(testFile, "utf-8");
const chunks = splitTextToChunks(text, BYTES_PER_CHUNK);
const merged = chunks.map((c) => c.text).join("");
console.log(`文件: ${testFile}`);
console.log(`bytesPerChunk: ${BYTES_PER_CHUNK}`);
console.log(`chunk 数量: ${chunks.length}`);
console.log(`合并与原文一致: ${merged === text}`);
chunks.forEach((c, i) => {
const bytes = Buffer.byteLength(c.text, "utf8");
console.log(`---[${i}] ${bytes} bytes----------------------------------------------`);
console.log(c.text);
});
|