Spaces:
Running
Running
Add hyparquet-compressors so ZSTD parquets actually parse
Browse filesThe dataset's parquets are ZSTD-compressed and hyparquet's core only ships
support for the trivially-decodable codecs. Pulled in `hyparquet-compressors`
(brotli/snappy/lz4/zstd/...) and pass its `compressors` map into
`parquetReadObjects`. Adds ~100KB gzip — still vastly under the 6.3MB
parquet-wasm blob it replaced.
- bun.lock +7 -0
- package.json +1 -0
- src/lib/api/parquet.ts +2 -1
bun.lock
CHANGED
|
@@ -6,6 +6,7 @@
|
|
| 6 |
"name": "app",
|
| 7 |
"dependencies": {
|
| 8 |
"hyparquet": "^1.25.6",
|
|
|
|
| 9 |
"mediabunny": "^1.42.0",
|
| 10 |
},
|
| 11 |
"devDependencies": {
|
|
@@ -322,10 +323,16 @@
|
|
| 322 |
|
| 323 |
"fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="],
|
| 324 |
|
|
|
|
|
|
|
| 325 |
"graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="],
|
| 326 |
|
| 327 |
"hyparquet": ["hyparquet@1.25.6", "", {}, "sha512-Q9W5IjkVch3ZMnYd4qFv2q8suu5Jc36yt7J+zUNM9grwnP1S189icp0jdEQKM5HJvQkTVy8NMiQ8n/dM5QAt1A=="],
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
"iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": "2.1.2" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
|
| 330 |
|
| 331 |
"inline-style-parser": ["inline-style-parser@0.2.7", "", {}, "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA=="],
|
|
|
|
| 6 |
"name": "app",
|
| 7 |
"dependencies": {
|
| 8 |
"hyparquet": "^1.25.6",
|
| 9 |
+
"hyparquet-compressors": "^1.1.1",
|
| 10 |
"mediabunny": "^1.42.0",
|
| 11 |
},
|
| 12 |
"devDependencies": {
|
|
|
|
| 323 |
|
| 324 |
"fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="],
|
| 325 |
|
| 326 |
+
"fzstd": ["fzstd@0.1.1", "", {}, "sha512-dkuVSOKKwh3eas5VkJy1AW1vFpet8TA/fGmVA5krThl8YcOVE/8ZIoEA1+U1vEn5ckxxhLirSdY837azmbaNHA=="],
|
| 327 |
+
|
| 328 |
"graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="],
|
| 329 |
|
| 330 |
"hyparquet": ["hyparquet@1.25.6", "", {}, "sha512-Q9W5IjkVch3ZMnYd4qFv2q8suu5Jc36yt7J+zUNM9grwnP1S189icp0jdEQKM5HJvQkTVy8NMiQ8n/dM5QAt1A=="],
|
| 331 |
|
| 332 |
+
"hyparquet-compressors": ["hyparquet-compressors@1.1.1", "", { "dependencies": { "fzstd": "0.1.1", "hysnappy": "1.0.0" } }, "sha512-yx7aA3Rhj0YycbdV71+XznQSLAefa4cT0urpgNXy4aM6eSeCknaVDNne8y45Uz74Fb15yyXUzOStlceOJBan7A=="],
|
| 333 |
+
|
| 334 |
+
"hysnappy": ["hysnappy@1.0.0", "", {}, "sha512-MNrC4NfwDGPb889O6gIfEtbvEZCSWUsSEhsz4Oq2FRcpGtXHfeVz3KciSPp5Pnnz1NjFMgDQNfxdJozymJEDDA=="],
|
| 335 |
+
|
| 336 |
"iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": "2.1.2" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
|
| 337 |
|
| 338 |
"inline-style-parser": ["inline-style-parser@0.2.7", "", {}, "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA=="],
|
package.json
CHANGED
|
@@ -42,6 +42,7 @@
|
|
| 42 |
},
|
| 43 |
"dependencies": {
|
| 44 |
"hyparquet": "^1.25.6",
|
|
|
|
| 45 |
"mediabunny": "^1.42.0"
|
| 46 |
}
|
| 47 |
}
|
|
|
|
| 42 |
},
|
| 43 |
"dependencies": {
|
| 44 |
"hyparquet": "^1.25.6",
|
| 45 |
+
"hyparquet-compressors": "^1.1.1",
|
| 46 |
"mediabunny": "^1.42.0"
|
| 47 |
}
|
| 48 |
}
|
src/lib/api/parquet.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import { parquetReadObjects } from 'hyparquet';
|
|
|
|
| 2 |
|
| 3 |
export type ParquetFetchOptions = {
|
| 4 |
fetch?: typeof fetch;
|
|
@@ -30,7 +31,7 @@ export async function fetchParquetRows<T = Record<string, unknown>>(
|
|
| 30 |
}
|
| 31 |
};
|
| 32 |
|
| 33 |
-
const rows = await parquetReadObjects({ file });
|
| 34 |
|
| 35 |
return rows.map((row) => {
|
| 36 |
const out: Record<string, unknown> = {};
|
|
|
|
| 1 |
import { parquetReadObjects } from 'hyparquet';
|
| 2 |
+
import { compressors } from 'hyparquet-compressors';
|
| 3 |
|
| 4 |
export type ParquetFetchOptions = {
|
| 5 |
fetch?: typeof fetch;
|
|
|
|
| 31 |
}
|
| 32 |
};
|
| 33 |
|
| 34 |
+
const rows = await parquetReadObjects({ file, compressors });
|
| 35 |
|
| 36 |
return rows.map((row) => {
|
| 37 |
const out: Record<string, unknown> = {};
|