import { parquetReadObjects } from 'hyparquet'; import { compressors } from 'hyparquet-compressors'; export type ParquetFetchOptions = { fetch?: typeof fetch; signal?: AbortSignal; }; /** * Fetch a parquet file over HTTP and return its rows as plain JS objects. * Backed by `hyparquet` (pure-JS, ~30KB) instead of `parquet-wasm` (6.3MB * WASM + init) — the index parquets are tiny (~1–2KB) so a JS reader is * dramatically faster end-to-end despite being slower per-byte. * * Timestamp columns surface as `Date` from hyparquet; we normalize them to * ISO strings here so the rest of the app sees a stable shape. BigInts are * downcast to numbers (every numeric column we read fits in a JS number). */ export async function fetchParquetRows>( url: string, opts: ParquetFetchOptions = {} ): Promise { const f = opts.fetch ?? fetch; const res = await f(url, { signal: opts.signal }); if (!res.ok) throw new Error(`parquet fetch ${url}: ${res.status} ${res.statusText}`); const buffer = await res.arrayBuffer(); const file = { byteLength: buffer.byteLength, slice(start: number, end?: number): Promise { return Promise.resolve(buffer.slice(start, end)); } }; const rows = await parquetReadObjects({ file, compressors }); return rows.map((row) => { const out: Record = {}; for (const [k, v] of Object.entries(row)) { if (v == null) out[k] = v; else if (v instanceof Date) out[k] = v.toISOString(); else if (typeof v === 'bigint') out[k] = Number(v); else out[k] = v; } return out; }) as T[]; }