blanchon commited on
Commit
a79c533
·
1 Parent(s): 6090044

Dedupe sharded manifest/rounds rows on overlap

Browse files

Re-rendered batches occasionally produce a second shard whose rows
collide on (match_id, map_name[, round]) — Svelte's each_key_duplicate
fired on the homepage grid. Keep the freshest uploaded_at per key.

Files changed (1) hide show
  1. src/lib/api/hf.ts +16 -2
src/lib/api/hf.ts CHANGED
@@ -27,7 +27,9 @@ async function listIndexShards(
27
  });
28
  }
29
 
30
- async function loadShardedIndex<T>(
 
 
31
  prefix: 'manifest' | 'rounds',
32
  opts: FetchOptions
33
  ): Promise<T[]> {
@@ -36,7 +38,19 @@ async function loadShardedIndex<T>(
36
  const shards = await Promise.all(
37
  paths.map((p) => fetchParquetRows<T>(resolveUrl(p), opts))
38
  );
39
- return shards.flat();
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
 
42
  export function listMatches(opts: FetchOptions = {}): Promise<Match[]> {
 
27
  });
28
  }
29
 
30
+ type IndexRow = { match_id: number; map_name: string; uploaded_at: string; round?: number };
31
+
32
+ async function loadShardedIndex<T extends IndexRow>(
33
  prefix: 'manifest' | 'rounds',
34
  opts: FetchOptions
35
  ): Promise<T[]> {
 
38
  const shards = await Promise.all(
39
  paths.map((p) => fetchParquetRows<T>(resolveUrl(p), opts))
40
  );
41
+ // A re-rendered batch produces a second shard whose rows collide with the
42
+ // original on (match,map[,round]). Keep the freshest copy per key.
43
+ const keyOf =
44
+ prefix === 'manifest'
45
+ ? (r: T) => `${r.match_id}|${r.map_name}`
46
+ : (r: T) => `${r.match_id}|${r.map_name}|${r.round}`;
47
+ const latest = new Map<string, T>();
48
+ for (const r of shards.flat()) {
49
+ const k = keyOf(r);
50
+ const cur = latest.get(k);
51
+ if (!cur || r.uploaded_at > cur.uploaded_at) latest.set(k, r);
52
+ }
53
+ return Array.from(latest.values());
54
  }
55
 
56
  export function listMatches(opts: FetchOptions = {}): Promise<Match[]> {