blanchon commited on
Commit
3f5bd49
·
1 Parent(s): 7f23fd2

Hub API: read compacted web-index parquets

Browse files

Home page: 454 reqs (1 tree + 226 manifest + 227 rounds shards) → 2 reqs
(index/web/maps.parquet + index/web/rounds.parquet).
Match page: 10 per-player chunks-preview shards → 1 per-(match,map)
chunks-preview.parquet.

Drops the now-unused listTree helper and PLAYER_COUNT constant.

Files changed (3) hide show
  1. src/lib/api/hf.ts +35 -79
  2. src/lib/api/hub.ts +0 -11
  3. src/lib/types.ts +0 -1
src/lib/api/hf.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { PLAYER_COUNT, type Match, type PreviewChunk, type Round } from '$lib/types';
2
- import { listTree, resolveUrl, type FetchOpts } from '$lib/api/hub';
3
  import { fetchParquetRows } from '$lib/api/parquet';
4
 
5
  export type FetchOptions = FetchOpts;
@@ -8,50 +8,16 @@ let matchesPromise: Promise<Match[]> | null = null;
8
  let roundsPromise: Promise<Round[]> | null = null;
9
  const matchPreviewsCache = new Map<string, Promise<PreviewChunk[]>>();
10
 
11
- // The dataset is sharded by upload batch. Index files are named like
12
- // `index/manifest-<shard_id>.parquet` / `index/rounds-<shard_id>.parquet`.
13
- // Discover them at runtime so new shards don't require a code change.
14
- async function listIndexShards(
15
- prefix: 'manifest' | 'rounds',
16
- opts: FetchOptions
17
- ): Promise<string[]> {
18
- const entries = await listTree('index', opts);
19
- return entries
20
- .filter((e) => e.type === 'file')
21
- .map((e) => e.path)
22
- .filter((p) => {
23
- const name = p.split('/').pop() ?? '';
24
- return name.startsWith(`${prefix}-`) && name.endsWith('.parquet');
25
- });
26
- }
27
-
28
- type IndexRow = { match_id: number; map_name: string; uploaded_at: string; round?: number };
29
-
30
- async function loadShardedIndex<T extends IndexRow>(
31
- prefix: 'manifest' | 'rounds',
32
- opts: FetchOptions
33
- ): Promise<T[]> {
34
- const paths = await listIndexShards(prefix, opts);
35
- if (!paths.length) return [];
36
- const shards = await Promise.all(paths.map((p) => fetchParquetRows<T>(resolveUrl(p), opts)));
37
- // A re-rendered batch produces a second shard whose rows collide with the
38
- // original on (match,map[,round]). Keep the freshest copy per key.
39
- const keyOf =
40
- prefix === 'manifest'
41
- ? (r: T) => `${r.match_id}|${r.map_name}`
42
- : (r: T) => `${r.match_id}|${r.map_name}|${r.round}`;
43
- const latest = new Map<string, T>();
44
- for (const r of shards.flat()) {
45
- const k = keyOf(r);
46
- const cur = latest.get(k);
47
- if (!cur || r.uploaded_at > cur.uploaded_at) latest.set(k, r);
48
- }
49
- return Array.from(latest.values());
50
- }
51
 
52
  export function listMatches(opts: FetchOptions = {}): Promise<Match[]> {
53
  if (matchesPromise) return matchesPromise;
54
- matchesPromise = loadShardedIndex<Match>('manifest', opts)
55
  .then((rows) => {
56
  rows.sort(
57
  (a, b) =>
@@ -67,13 +33,12 @@ export function listMatches(opts: FetchOptions = {}): Promise<Match[]> {
67
  return matchesPromise;
68
  }
69
 
70
- export async function listAllRounds(opts: FetchOptions = {}): Promise<Round[]> {
71
- if (!roundsPromise) {
72
- roundsPromise = loadShardedIndex<Round>('rounds', opts).catch((err) => {
73
- roundsPromise = null;
74
- throw err;
75
- });
76
- }
77
  return roundsPromise;
78
  }
79
 
@@ -89,9 +54,9 @@ export async function listRounds(
89
  }
90
 
91
  /**
92
- * Fetch all preview rows for every player on this (match, map) in parallel.
93
- * The shard_id from the match's manifest row is enough to construct each
94
- * player's parquet URL no tree-API discovery needed.
95
  */
96
  async function loadMatchPreviews(
97
  matchId: number,
@@ -102,32 +67,23 @@ async function loadMatchPreviews(
102
  const cached = matchPreviewsCache.get(key);
103
  if (cached) return cached;
104
 
105
- const promise = (async () => {
106
- const matches = await listMatches(opts);
107
- const match = matches.find((m) => m.match_id === matchId && m.map_name === mapName);
108
- if (!match) return [];
109
-
110
- const players = Array.from({ length: PLAYER_COUNT }, (_, i) => i);
111
- const results = await Promise.all(
112
- players.map(async (player) => {
113
- const dir = `data/match_id=${matchId}/map_name=${mapName}/player=${player}`;
114
- try {
115
- const rows = await fetchParquetRows<PreviewChunk>(
116
- resolveUrl(`${dir}/chunks-preview-${match.shard_id}.parquet`),
117
- opts
118
- );
119
- for (const r of rows) r.preview_video = { src: resolveUrl(`${dir}/${r.preview_path}`) };
120
- return rows;
121
- } catch {
122
- return [] as PreviewChunk[];
123
- }
124
- })
125
- );
126
- return results.flat();
127
- })().catch((err) => {
128
- matchPreviewsCache.delete(key);
129
- throw err;
130
- });
131
 
132
  matchPreviewsCache.set(key, promise);
133
  return promise;
 
1
+ import type { Match, PreviewChunk, Round } from '$lib/types';
2
+ import { resolveUrl, type FetchOpts } from '$lib/api/hub';
3
  import { fetchParquetRows } from '$lib/api/parquet';
4
 
5
  export type FetchOptions = FetchOpts;
 
8
  let roundsPromise: Promise<Round[]> | null = null;
9
  const matchPreviewsCache = new Map<string, Promise<PreviewChunk[]>>();
10
 
11
+ // Compacted web-tier index. Single deduped parquet per scope, written by the
12
+ // ingestion pipeline after each upload batch.
13
+ const WEB_MAPS = 'index/web/maps.parquet';
14
+ const WEB_ROUNDS = 'index/web/rounds.parquet';
15
+ const matchPreviewsPath = (matchId: number, mapName: string) =>
16
+ `data/match_id=${matchId}/map_name=${mapName}/chunks-preview.parquet`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  export function listMatches(opts: FetchOptions = {}): Promise<Match[]> {
19
  if (matchesPromise) return matchesPromise;
20
+ matchesPromise = fetchParquetRows<Match>(resolveUrl(WEB_MAPS), opts)
21
  .then((rows) => {
22
  rows.sort(
23
  (a, b) =>
 
33
  return matchesPromise;
34
  }
35
 
36
+ export function listAllRounds(opts: FetchOptions = {}): Promise<Round[]> {
37
+ if (roundsPromise) return roundsPromise;
38
+ roundsPromise = fetchParquetRows<Round>(resolveUrl(WEB_ROUNDS), opts).catch((err) => {
39
+ roundsPromise = null;
40
+ throw err;
41
+ });
 
42
  return roundsPromise;
43
  }
44
 
 
54
  }
55
 
56
  /**
57
+ * Single deduped per-(match, map) parquet with every player's preview rows.
58
+ * Each row's `preview_path` is relative to its own `player=N` dir, so the
59
+ * absolute URL is rebuilt using `r.player`.
60
  */
61
  async function loadMatchPreviews(
62
  matchId: number,
 
67
  const cached = matchPreviewsCache.get(key);
68
  if (cached) return cached;
69
 
70
+ const baseDir = `data/match_id=${matchId}/map_name=${mapName}`;
71
+ const promise = fetchParquetRows<PreviewChunk>(
72
+ resolveUrl(matchPreviewsPath(matchId, mapName)),
73
+ opts
74
+ )
75
+ .then((rows) => {
76
+ for (const r of rows) {
77
+ r.preview_video = {
78
+ src: resolveUrl(`${baseDir}/player=${r.player}/${r.preview_path}`)
79
+ };
80
+ }
81
+ return rows;
82
+ })
83
+ .catch((err) => {
84
+ matchPreviewsCache.delete(key);
85
+ throw err;
86
+ });
 
 
 
 
 
 
 
 
 
87
 
88
  matchPreviewsCache.set(key, promise);
89
  return promise;
src/lib/api/hub.ts CHANGED
@@ -8,14 +8,3 @@ export type FetchOpts = { fetch?: typeof fetch; signal?: AbortSignal };
8
  export function resolveUrl(repoPath: string): string {
9
  return `https://huggingface.co/datasets/${DATASET}/resolve/${REF}/${repoPath}`;
10
  }
11
-
12
- export type TreeEntry = { type: 'file' | 'directory'; path: string; size?: number };
13
-
14
- /** List entries under `<dataset>/<dirPath>` via the HF tree API. */
15
- export async function listTree(dirPath: string, opts: FetchOpts = {}): Promise<TreeEntry[]> {
16
- const fetchFn = opts.fetch ?? fetch;
17
- const url = `https://huggingface.co/api/datasets/${DATASET}/tree/${REF}/${dirPath}`;
18
- const r = await fetchFn(url, { signal: opts.signal });
19
- if (!r.ok) throw new Error(`tree ${dirPath}: ${r.status}`);
20
- return (await r.json()) as TreeEntry[];
21
- }
 
8
  export function resolveUrl(repoPath: string): string {
9
  return `https://huggingface.co/datasets/${DATASET}/resolve/${REF}/${repoPath}`;
10
  }
 
 
 
 
 
 
 
 
 
 
 
src/lib/types.ts CHANGED
@@ -1,5 +1,4 @@
1
  export const DATASET = 'blanchon/cs2_dataset_render';
2
- export const PLAYER_COUNT = 10;
3
 
4
  export type Winner = 'team1' | 'team2';
5
  export type WinnerSide = 'ct' | 't' | null;
 
1
  export const DATASET = 'blanchon/cs2_dataset_render';
 
2
 
3
  export type Winner = 'team1' | 'team2';
4
  export type WinnerSide = 'ct' | 't' | null;