| |
| |
| |
| |
| |
| import * as fs from "fs"; |
| import * as path from "path"; |
| import { splitTextToChunks } from "./semanticUtils"; |
|
|
| const defaultFile = path.resolve(__dirname, "./chunk_test1.txt"); |
| const testFile = path.resolve(process.argv[2] || defaultFile); |
| const BYTES_PER_CHUNK = parseInt(process.argv[3] || "800", 10); |
|
|
| const text = fs.readFileSync(testFile, "utf-8"); |
|
|
| const chunks = splitTextToChunks(text, BYTES_PER_CHUNK); |
| const merged = chunks.map((c) => c.text).join(""); |
|
|
| console.log(`文件: ${testFile}`); |
| console.log(`bytesPerChunk: ${BYTES_PER_CHUNK}`); |
| console.log(`chunk 数量: ${chunks.length}`); |
| console.log(`合并与原文一致: ${merged === text}`); |
| chunks.forEach((c, i) => { |
| const bytes = Buffer.byteLength(c.text, "utf8"); |
| console.log(`---[${i}] ${bytes} bytes----------------------------------------------`); |
| console.log(c.text); |
| }); |
|
|