| #!/usr/bin/env node |
| import fs from 'fs/promises' |
| import path from 'path' |
|
|
| function usage() { |
| console.log(`Usage: |
| node scripts/count_column.js --file <file.csv> --column <columnName|index> [--out <out.txt>] |
| |
| Options: |
| --file CSV 文件路径(必需)。 |
| --column 指定列名或列序号(1 表示第一列)。如果为数字则按 1-based 处理。 |
| --out 可选,将统计结果写入指定文件(默认:打印到控制台)。 |
| --help 显示帮助。 |
| `) |
| } |
|
|
| function parseArgs(argv) { |
| const args = argv.slice(2) |
| const opts = { file: null, column: null, out: null } |
| for (let i = 0; i < args.length; i++) { |
| const a = args[i] |
| if (a === '--help' || a === '-h') { opts.help = true; break } |
| if (a === '--file') { opts.file = args[++i]; continue } |
| if (a === '--column') { opts.column = args[++i]; continue } |
| if (a === '--out') { opts.out = args[++i]; continue } |
| |
| if (!opts.file) opts.file = a |
| } |
| return opts |
| } |
|
|
| |
| function parseCSVLine(line) { |
| const res = [] |
| let cur = '' |
| let inQuotes = false |
| for (let i = 0; i < line.length; i++) { |
| const ch = line[i] |
| if (inQuotes) { |
| if (ch === '"') { |
| if (i + 1 < line.length && line[i + 1] === '"') { cur += '"'; i++ } else { inQuotes = false } |
| } else { cur += ch } |
| } else { |
| if (ch === ',') { res.push(cur); cur = '' } |
| else if (ch === '"') { inQuotes = true } |
| else { cur += ch } |
| } |
| } |
| res.push(cur) |
| return res |
| } |
|
|
| async function readCSV(filePath) { |
| const txt = await fs.readFile(filePath, 'utf8') |
| const lines = txt.split(/\r?\n/) |
| let headerLineIndex = null |
| for (let i = 0; i < lines.length; i++) { if (lines[i].trim().length > 0) { headerLineIndex = i; break } } |
| if (headerLineIndex === null) return { headers: [], rows: [] } |
| const headers = parseCSVLine(lines[headerLineIndex]) |
| const rows = [] |
| for (let i = headerLineIndex + 1; i < lines.length; i++) { |
| const l = lines[i] |
| if (l == null || l.trim() === '') continue |
| const vals = parseCSVLine(l) |
| rows.push(vals) |
| } |
| return { headers, rows } |
| } |
|
|
| function normalizeKey(k) { |
| if (k == null) return '' |
| return String(k).trim() |
| } |
|
|
| async function main() { |
| const opts = parseArgs(process.argv) |
| if (opts.help) { usage(); return } |
| if (!opts.file || !opts.column) { console.error('Missing --file or --column'); usage(); process.exit(1) } |
|
|
| const filePath = path.isAbsolute(opts.file) ? opts.file : path.join(process.cwd(), opts.file) |
| let stat |
| try { stat = await fs.stat(filePath) } catch (e) { console.error('File not found:', filePath); process.exit(2) } |
| if (!stat.isFile()) { console.error('Not a file:', filePath); process.exit(3) } |
|
|
| const { headers, rows } = await readCSV(filePath) |
| if (!headers || headers.length === 0) { console.error('No header found in CSV'); process.exit(4) } |
|
|
| |
| let colIndex = -1 |
| if (/^\d+$/.test(opts.column)) { |
| const idx = parseInt(opts.column, 10) |
| colIndex = idx - 1 |
| if (colIndex < 0 || colIndex >= headers.length) { console.error('Column index out of range'); process.exit(5) } |
| } else { |
| |
| colIndex = headers.indexOf(opts.column) |
| if (colIndex === -1) { |
| |
| const lower = opts.column.toLowerCase() |
| colIndex = headers.findIndex(h => String(h).toLowerCase() === lower) |
| if (colIndex === -1) { console.error(`Column name not found: ${opts.column}`); process.exit(6) } |
| } |
| } |
|
|
| const counts = new Map() |
| for (const vals of rows) { |
| const v = normalizeKey(vals[colIndex]) |
| counts.set(v, (counts.get(v) || 0) + 1) |
| } |
|
|
| |
| const items = Array.from(counts.entries()).sort((a, b) => { |
| if (b[1] !== a[1]) return b[1] - a[1] |
| return String(a[0]).localeCompare(String(b[0])) |
| }) |
|
|
| const outLines = [] |
| outLines.push(`File: ${filePath}`) |
| outLines.push(`Column: ${headers[colIndex]} (index ${colIndex + 1})`) |
| outLines.push(`Total distinct classes: ${items.length}`) |
| outLines.push('') |
| outLines.push('Value,Count') |
| for (const [val, cnt] of items) outLines.push(`${val},${cnt}`) |
|
|
| if (opts.out) { |
| const outPath = path.isAbsolute(opts.out) ? opts.out : path.join(process.cwd(), opts.out) |
| await fs.mkdir(path.dirname(outPath), { recursive: true }) |
| await fs.writeFile(outPath, outLines.join('\n'), 'utf8') |
| console.log(`Wrote counts to ${outPath} (${items.length} distinct)`) |
| } else { |
| console.log(outLines.join('\n')) |
| } |
| } |
|
|
| main().catch(err => { console.error('Error:', err && err.stack ? err.stack : err); process.exit(10) }) |
|
|