blanchon commited on
Commit
6d55c38
·
1 Parent(s): e355be5

Evaluation: 4-round policy, validations store, prev/next, copy/clear, build flag

Browse files

- Bump policy from 3 → 4 rounds per (match, map): first, last, 2 PRNG-picked
middle rounds (deterministic on match_id+map). Fully reviewing all 4
without flagging implies the (match, map) is good.
- New localStorage store opencs2:eval:validations:v1: clicking Next
validates the current candidate. firstUnreviewed/nextUnreviewed skip
already-flagged or already-validated candidates so a returning reviewer
resumes where they left off (header eval entry jumps to first unreviewed).
- Eval bar adds Previous (←) for backtracking and a "N reviewed" counter.
- Flag dialog gets a "Local review data" footer with live flag/validation
counts, a Copy button (full JSON to clipboard via exportReviews), and a
Trash button to clear both stores. Note field already free-text.
- New "Don't flag" entry: "Recording ends right at death" — POVs cut on the
exact death tick (intentional, to avoid camera glitches).
- Build flag PUBLIC_DISABLE_EVAL=1 hides the entire eval surface (header
toggle, eval bar, flag dialog) — module stays in bundle but never
renders. Documented in README.

README.md CHANGED
@@ -115,6 +115,10 @@ bun run check # svelte-check
115
 
116
  The repo ships a `Dockerfile` and `serve.ts` that mirror the Hugging Face Space deployment.
117
 
 
 
 
 
118
  ## Viewer internals
119
 
120
  - **`hyparquet` + `hyparquet-compressors`** read the match and round parquet shards directly from `hf://` URLs. No server, no DuckDB, no WASM bundle larger than necessary.
 
115
 
116
  The repo ships a `Dockerfile` and `serve.ts` that mirror the Hugging Face Space deployment.
117
 
118
+ ### Build flags
119
+
120
+ - `PUBLIC_DISABLE_EVAL=1` — hide the evaluation surface (header toggle, eval bar, flag dialog) for public deploys. Set at build time only (`PUBLIC_DISABLE_EVAL=1 bun run build`); the eval module stays in the bundle but is never rendered.
121
+
122
  ## Viewer internals
123
 
124
  - **`hyparquet` + `hyparquet-compressors`** read the match and round parquet shards directly from `hf://` URLs. No server, no DuckDB, no WASM bundle larger than necessary.
src/lib/components/eval-bar.svelte CHANGED
@@ -1,16 +1,21 @@
1
  <script lang="ts">
2
  import { Button } from '$lib/components/ui/button';
3
- import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
4
  import * as Tooltip from '$lib/components/ui/tooltip';
5
  import FlagIcon from 'phosphor-svelte/lib/FlagIcon';
6
- import InfoIcon from 'phosphor-svelte/lib/InfoIcon';
7
  import ArrowRightIcon from 'phosphor-svelte/lib/ArrowRightIcon';
 
8
  import XIcon from 'phosphor-svelte/lib/XIcon';
9
  import { goto } from '$app/navigation';
10
  import { page } from '$app/state';
11
- import { addFlag, FLAG_REASONS, evalUrl, type EvalCandidate, type FlagReason } from '$lib/eval';
 
 
 
 
 
 
12
  import { toast } from 'svelte-sonner';
13
- import EvalInfoDialog from './eval-info-dialog.svelte';
14
 
15
  interface Props {
16
  queue: EvalCandidate[];
@@ -21,26 +26,41 @@
21
  }
22
  let { queue, index, matchId, mapName, round }: Props = $props();
23
 
24
- let infoOpen = $state(false);
 
 
 
25
 
26
- function flag(reason: FlagReason, label: string) {
27
- addFlag({ matchId, mapName, round, reason });
28
- toast.success(`Flagged: ${label}`, {
29
- description: `match ${matchId} · ${mapName} · round ${round}`
30
- });
31
- }
 
32
 
33
  function next() {
34
- const ni = index < 0 ? 0 : index + 1;
35
- if (ni >= queue.length) {
 
 
 
 
 
36
  toast.success('Evaluation complete', {
37
- description: `Reached end of queue (${queue.length} candidates).`
38
  });
39
  return;
40
  }
41
  goto(evalUrl(queue[ni], ni));
42
  }
43
 
 
 
 
 
 
 
44
  function exitEval() {
45
  const url = new URL(page.url);
46
  url.searchParams.delete('eval');
@@ -60,31 +80,10 @@
60
  >Eval</span
61
  >
62
  <span class="text-muted-foreground tabular-nums">{positionLabel}</span>
 
 
63
 
64
  <div class="ml-auto flex items-center gap-1">
65
- <DropdownMenu.Root>
66
- <DropdownMenu.Trigger>
67
- {#snippet child({ props })}
68
- <Button {...props} variant="outline" size="sm">
69
- <FlagIcon size={14} weight="fill" /> Flag
70
- </Button>
71
- {/snippet}
72
- </DropdownMenu.Trigger>
73
- <DropdownMenu.Content align="end" class="w-72">
74
- <DropdownMenu.Label>Flag this candidate as…</DropdownMenu.Label>
75
- <DropdownMenu.Separator />
76
- {#each FLAG_REASONS as r (r.id)}
77
- <DropdownMenu.Item
78
- onclick={() => flag(r.id, r.label)}
79
- class="flex-col items-start gap-0.5"
80
- >
81
- <span class="font-medium">{r.label}</span>
82
- <span class="text-[10px] text-muted-foreground">{r.description}</span>
83
- </DropdownMenu.Item>
84
- {/each}
85
- </DropdownMenu.Content>
86
- </DropdownMenu.Root>
87
-
88
  <Tooltip.Root>
89
  <Tooltip.Trigger>
90
  {#snippet child({ props })}
@@ -92,16 +91,21 @@
92
  {...props}
93
  variant="ghost"
94
  size="icon-sm"
95
- onclick={() => (infoOpen = true)}
96
- aria-label="What to flag"
 
97
  >
98
- <InfoIcon size={14} weight="duotone" />
99
  </Button>
100
  {/snippet}
101
  </Tooltip.Trigger>
102
- <Tooltip.Content side="bottom">How to evaluate</Tooltip.Content>
103
  </Tooltip.Root>
104
 
 
 
 
 
105
  <Button onclick={next} size="sm" disabled={total === 0}>
106
  Next <ArrowRightIcon size={14} weight="bold" />
107
  </Button>
@@ -126,4 +130,11 @@
126
  </div>
127
  </div>
128
 
129
- <EvalInfoDialog bind:open={infoOpen} />
 
 
 
 
 
 
 
 
1
  <script lang="ts">
2
  import { Button } from '$lib/components/ui/button';
 
3
  import * as Tooltip from '$lib/components/ui/tooltip';
4
  import FlagIcon from 'phosphor-svelte/lib/FlagIcon';
 
5
  import ArrowRightIcon from 'phosphor-svelte/lib/ArrowRightIcon';
6
+ import ArrowLeftIcon from 'phosphor-svelte/lib/ArrowLeftIcon';
7
  import XIcon from 'phosphor-svelte/lib/XIcon';
8
  import { goto } from '$app/navigation';
9
  import { page } from '$app/state';
10
+ import {
11
+ addValidation,
12
+ evalUrl,
13
+ nextUnreviewed,
14
+ reviewedKeySet,
15
+ type EvalCandidate
16
+ } from '$lib/eval';
17
  import { toast } from 'svelte-sonner';
18
+ import EvalFlagDialog from './eval-flag-dialog.svelte';
19
 
20
  interface Props {
21
  queue: EvalCandidate[];
 
26
  }
27
  let { queue, index, matchId, mapName, round }: Props = $props();
28
 
29
+ let flagOpen = $state(false);
30
+ // Bumped after each review action so the dialog count and the Next-skip
31
+ // logic re-read localStorage without needing a reactive store.
32
+ let reviewBump = $state(0);
33
 
34
+ const reviewedCount = $derived.by(() => {
35
+ void reviewBump;
36
+ const reviewed = reviewedKeySet();
37
+ let n = 0;
38
+ for (const c of queue) if (reviewed.has(`${c.matchId}|${c.mapName}|${c.round}`)) n++;
39
+ return n;
40
+ });
41
 
42
  function next() {
43
+ // Hitting Next without flagging implicitly validates the current candidate.
44
+ if (round && index >= 0) {
45
+ addValidation({ matchId, mapName, round });
46
+ reviewBump++;
47
+ }
48
+ const ni = nextUnreviewed(queue, index < 0 ? -1 : index, reviewedKeySet());
49
+ if (ni < 0) {
50
  toast.success('Evaluation complete', {
51
+ description: `Reviewed ${reviewedCount + 1} / ${queue.length} candidates.`
52
  });
53
  return;
54
  }
55
  goto(evalUrl(queue[ni], ni));
56
  }
57
 
58
+ function previous() {
59
+ if (index <= 0) return;
60
+ const pi = index - 1;
61
+ goto(evalUrl(queue[pi], pi));
62
+ }
63
+
64
  function exitEval() {
65
  const url = new URL(page.url);
66
  url.searchParams.delete('eval');
 
80
  >Eval</span
81
  >
82
  <span class="text-muted-foreground tabular-nums">{positionLabel}</span>
83
+ <span class="text-muted-foreground/60 tabular-nums">·</span>
84
+ <span class="text-muted-foreground tabular-nums">{reviewedCount} reviewed</span>
85
 
86
  <div class="ml-auto flex items-center gap-1">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  <Tooltip.Root>
88
  <Tooltip.Trigger>
89
  {#snippet child({ props })}
 
91
  {...props}
92
  variant="ghost"
93
  size="icon-sm"
94
+ onclick={previous}
95
+ disabled={index <= 0}
96
+ aria-label="Previous candidate"
97
  >
98
+ <ArrowLeftIcon size={14} weight="bold" />
99
  </Button>
100
  {/snippet}
101
  </Tooltip.Trigger>
102
+ <Tooltip.Content side="bottom">Previous candidate</Tooltip.Content>
103
  </Tooltip.Root>
104
 
105
+ <Button variant="outline" size="sm" onclick={() => (flagOpen = true)}>
106
+ <FlagIcon size={14} weight="fill" /> Flag
107
+ </Button>
108
+
109
  <Button onclick={next} size="sm" disabled={total === 0}>
110
  Next <ArrowRightIcon size={14} weight="bold" />
111
  </Button>
 
130
  </div>
131
  </div>
132
 
133
+ <EvalFlagDialog
134
+ bind:open={flagOpen}
135
+ {matchId}
136
+ {mapName}
137
+ {round}
138
+ queueLength={total}
139
+ onChange={() => reviewBump++}
140
+ />
src/lib/components/eval-flag-dialog.svelte ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <script lang="ts">
2
+ import * as Dialog from '$lib/components/ui/dialog';
3
+ import { Button } from '$lib/components/ui/button';
4
+ import { Label } from '$lib/components/ui/label';
5
+ import {
6
+ addFlag,
7
+ clearFlags,
8
+ clearValidations,
9
+ exportReviews,
10
+ FLAG_REASONS,
11
+ KNOWN_MINOR_ISSUES,
12
+ loadFlags,
13
+ loadValidations,
14
+ type FlagReason
15
+ } from '$lib/eval';
16
+ import ArrowSquareOutIcon from 'phosphor-svelte/lib/ArrowSquareOutIcon';
17
+ import ClipboardIcon from 'phosphor-svelte/lib/ClipboardIcon';
18
+ import TrashIcon from 'phosphor-svelte/lib/TrashIcon';
19
+ import { toast } from 'svelte-sonner';
20
+
21
+ interface Props {
22
+ open?: boolean;
23
+ matchId: number;
24
+ mapName: string;
25
+ round: number;
26
+ queueLength: number;
27
+ onChange?: () => void;
28
+ }
29
+ let { open = $bindable(false), matchId, mapName, round, queueLength, onChange }: Props = $props();
30
+
31
+ let reason = $state<FlagReason | null>(null);
32
+ let note = $state('');
33
+ // Bumped to refresh the counts when the user copies/clears or saves a flag.
34
+ let storeBump = $state(0);
35
+
36
+ $effect(() => {
37
+ if (!open) {
38
+ reason = null;
39
+ note = '';
40
+ } else {
41
+ storeBump++;
42
+ }
43
+ });
44
+
45
+ const flagsCount = $derived.by(() => {
46
+ void storeBump;
47
+ return loadFlags().length;
48
+ });
49
+ const validationsCount = $derived.by(() => {
50
+ void storeBump;
51
+ return loadValidations().length;
52
+ });
53
+
54
+ function save() {
55
+ if (!reason) return;
56
+ const trimmed = note.trim();
57
+ addFlag({ matchId, mapName, round, reason, note: trimmed || undefined });
58
+ const label = FLAG_REASONS.find((r) => r.id === reason)?.label ?? 'Flagged';
59
+ toast.success(`Flagged: ${label}`, {
60
+ description: `match ${matchId} · ${mapName} · round ${round}`
61
+ });
62
+ onChange?.();
63
+ open = false;
64
+ }
65
+
66
+ async function copyAll() {
67
+ const data = exportReviews(queueLength);
68
+ try {
69
+ await navigator.clipboard.writeText(JSON.stringify(data, null, 2));
70
+ toast.success('Review data copied to clipboard', {
71
+ description: `${data.flags.length} flags · ${data.validations.length} validations`
72
+ });
73
+ } catch {
74
+ toast.error('Could not copy to clipboard');
75
+ }
76
+ }
77
+
78
+ function clearAll() {
79
+ clearFlags();
80
+ clearValidations();
81
+ storeBump++;
82
+ onChange?.();
83
+ toast.success('Cleared local review data');
84
+ }
85
+ </script>
86
+
87
+ <Dialog.Root bind:open>
88
+ <Dialog.Content
89
+ class="flex max-h-[min(80vh,40rem)] w-full max-w-xl flex-col gap-3 overflow-hidden p-0 sm:max-w-xl"
90
+ >
91
+ <Dialog.Header class="border-b px-4 pt-4 pb-3">
92
+ <Dialog.Title>Flag this candidate</Dialog.Title>
93
+ <Dialog.Description>
94
+ match {matchId} · {mapName} · round {round}. Pick a reason and (optionally) leave a note.
95
+ </Dialog.Description>
96
+ </Dialog.Header>
97
+
98
+ <div class="flex min-h-0 flex-col gap-3 overflow-y-auto px-4">
99
+ <div class="grid gap-1.5">
100
+ {#each FLAG_REASONS as r (r.id)}
101
+ <button
102
+ type="button"
103
+ data-active={reason === r.id || undefined}
104
+ class="flex flex-col items-start gap-0.5 rounded-md border border-border px-2.5 py-1.5 text-left text-xs transition hover:bg-muted/50 data-active:border-amber-500 data-active:bg-amber-500/10"
105
+ onclick={() => (reason = r.id)}
106
+ >
107
+ <span class="flex items-center gap-2">
108
+ <span class="font-medium text-foreground">{r.label}</span>
109
+ <span
110
+ data-severity={r.severity}
111
+ class="rounded-sm border px-1 py-0 text-[9px] font-semibold tracking-wider text-muted-foreground/80 uppercase data-[severity=major]:border-rose-500/30 data-[severity=major]:text-rose-600 dark:data-[severity=major]:text-rose-400"
112
+ >{r.severity}</span
113
+ >
114
+ </span>
115
+ <span class="text-[11px] leading-snug text-muted-foreground">{r.description}</span>
116
+ {#if r.examples?.length}
117
+ <span class="mt-1 flex flex-wrap items-center gap-x-2 gap-y-0.5 text-[10px]">
118
+ <span class="tracking-wider text-muted-foreground/70 uppercase">Examples:</span>
119
+ {#each r.examples as href, i (href)}
120
+ <a
121
+ {href}
122
+ target="_blank"
123
+ rel="noreferrer noopener"
124
+ onclick={(e) => e.stopPropagation()}
125
+ class="inline-flex items-center gap-0.5 text-muted-foreground underline-offset-2 hover:text-foreground hover:underline"
126
+ >
127
+ #{i + 1}
128
+ <ArrowSquareOutIcon size={10} weight="bold" />
129
+ </a>
130
+ {/each}
131
+ </span>
132
+ {/if}
133
+ </button>
134
+ {/each}
135
+ </div>
136
+
137
+ <div class="grid gap-1.5">
138
+ <Label for="flag-note" class="text-xs">Notes (optional)</Label>
139
+ <textarea
140
+ id="flag-note"
141
+ bind:value={note}
142
+ placeholder="e.g. player 4 spawn position is at mid instead of T spawn from t=0"
143
+ rows="3"
144
+ class="w-full resize-y rounded-md border border-input bg-input/20 px-2 py-1.5 text-xs outline-none focus-visible:border-ring focus-visible:ring-2 focus-visible:ring-ring/30 dark:bg-input/30"
145
+ ></textarea>
146
+ </div>
147
+
148
+ {#each KNOWN_MINOR_ISSUES as k (k.label)}
149
+ <div class="rounded-md border border-dashed border-muted-foreground/20 p-2.5 text-[11px]">
150
+ <div
151
+ class="mb-0.5 text-[10px] font-semibold tracking-wider text-muted-foreground/80 uppercase"
152
+ >
153
+ Don't flag — known minor
154
+ </div>
155
+ <div class="font-medium text-foreground">{k.label}</div>
156
+ <div class="mt-0.5 leading-snug text-muted-foreground">{k.description}</div>
157
+ {#if k.examples?.length}
158
+ <div class="mt-1 flex flex-wrap gap-2 text-[10px]">
159
+ {#each k.examples as href, i (href)}
160
+ <a
161
+ {href}
162
+ target="_blank"
163
+ rel="noreferrer noopener"
164
+ class="inline-flex items-center gap-0.5 text-muted-foreground underline-offset-2 hover:text-foreground hover:underline"
165
+ >
166
+ #{i + 1}
167
+ <ArrowSquareOutIcon size={10} weight="bold" />
168
+ </a>
169
+ {/each}
170
+ </div>
171
+ {/if}
172
+ </div>
173
+ {/each}
174
+
175
+ <div class="rounded-md border bg-muted/30 px-2.5 py-2 text-[11px]">
176
+ <div class="flex items-center justify-between gap-2">
177
+ <div>
178
+ <div class="font-medium text-foreground">Local review data</div>
179
+ <div class="mt-0.5 text-[10px] text-muted-foreground">
180
+ <span class="tabular-nums">{flagsCount}</span> flags ·
181
+ <span class="tabular-nums">{validationsCount}</span> validations · stored in
182
+ <code class="font-mono">localStorage</code> (persists across sessions)
183
+ </div>
184
+ </div>
185
+ <div class="flex shrink-0 items-center gap-1">
186
+ <Button variant="outline" size="sm" onclick={copyAll}>
187
+ <ClipboardIcon size={12} weight="duotone" /> Copy
188
+ </Button>
189
+ <Button
190
+ variant="ghost"
191
+ size="icon-sm"
192
+ onclick={clearAll}
193
+ aria-label="Clear local review data"
194
+ >
195
+ <TrashIcon size={12} weight="duotone" />
196
+ </Button>
197
+ </div>
198
+ </div>
199
+ </div>
200
+ </div>
201
+
202
+ <Dialog.Footer class="border-t px-4 pt-3 pb-4">
203
+ <Button variant="ghost" size="sm" onclick={() => (open = false)}>Cancel</Button>
204
+ <Button size="sm" onclick={save} disabled={!reason}>Save flag</Button>
205
+ </Dialog.Footer>
206
+ </Dialog.Content>
207
+ </Dialog.Root>
src/lib/components/eval-info-dialog.svelte DELETED
@@ -1,43 +0,0 @@
1
- <script lang="ts">
2
- import * as Dialog from '$lib/components/ui/dialog';
3
- import { FLAG_REASONS } from '$lib/eval';
4
-
5
- interface Props {
6
- open?: boolean;
7
- }
8
- let { open = $bindable(false) }: Props = $props();
9
- </script>
10
-
11
- <Dialog.Root bind:open>
12
- <Dialog.Content class="max-w-xl">
13
- <Dialog.Header>
14
- <Dialog.Title>How to evaluate</Dialog.Title>
15
- <Dialog.Description>
16
- The queue samples 3 rounds per match — round 1, the last round, and one deterministic round
17
- in the middle. Watch each candidate, flag any issue you spot, then click <span
18
- class="font-medium text-foreground">Next</span
19
- > to advance.
20
- </Dialog.Description>
21
- </Dialog.Header>
22
-
23
- <div class="mt-3 space-y-3 text-sm">
24
- <div>
25
- <div class="font-semibold text-foreground">Flag if you see:</div>
26
- <ul class="mt-1.5 space-y-1.5 text-xs/relaxed text-muted-foreground">
27
- {#each FLAG_REASONS as r (r.id)}
28
- <li class="flex flex-col gap-0.5">
29
- <span class="font-medium text-foreground">{r.label}</span>
30
- <span>{r.description}</span>
31
- </li>
32
- {/each}
33
- </ul>
34
- </div>
35
-
36
- <p class="border-t pt-3 text-xs text-muted-foreground">
37
- Flags are stored in your browser's <code class="font-mono">localStorage</code> under
38
- <code class="font-mono">opencs2:eval:flags:v1</code>. Export them by copying that key from
39
- devtools.
40
- </p>
41
- </div>
42
- </Dialog.Content>
43
- </Dialog.Root>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/components/header.svelte CHANGED
@@ -11,7 +11,8 @@
11
  import HfLogo from '$lib/components/hf-logo.svelte';
12
  import { site } from '$lib/site';
13
  import type { Match } from '$lib/types';
14
- import { buildEvalQueue, evalUrl } from '$lib/eval';
 
15
 
16
  const inEval = $derived(page.url.searchParams.get('eval') === '1');
17
  const matches = $derived((page.data?.matches ?? []) as Match[]);
@@ -26,7 +27,15 @@
26
  }
27
  const queue = buildEvalQueue(matches);
28
  if (!queue.length) return;
29
- goto(evalUrl(queue[0], 0));
 
 
 
 
 
 
 
 
30
  }
31
  </script>
32
 
@@ -42,27 +51,29 @@
42
  </a>
43
 
44
  <div class="ml-auto flex items-center gap-1">
45
- <Tooltip.Root>
46
- <Tooltip.Trigger>
47
- {#snippet child({ props })}
48
- <Button
49
- {...props}
50
- variant="ghost"
51
- size="icon-sm"
52
- onclick={toggleEval}
53
- disabled={!matches.length}
54
- data-active={inEval || undefined}
55
- class="data-active:bg-amber-500/15 data-active:text-amber-700 dark:data-active:text-amber-300"
56
- aria-label={inEval ? 'Exit evaluation' : 'Start evaluation'}
57
- >
58
- <ListChecksIcon size={16} weight="duotone" />
59
- </Button>
60
- {/snippet}
61
- </Tooltip.Trigger>
62
- <Tooltip.Content side="bottom">
63
- {inEval ? 'Exit evaluation' : 'Start evaluation'}
64
- </Tooltip.Content>
65
- </Tooltip.Root>
 
 
66
  <Tooltip.Root>
67
  <Tooltip.Trigger>
68
  {#snippet child({ props })}
 
11
  import HfLogo from '$lib/components/hf-logo.svelte';
12
  import { site } from '$lib/site';
13
  import type { Match } from '$lib/types';
14
+ import { buildEvalQueue, evalUrl, firstUnreviewed, EVAL_ENABLED } from '$lib/eval';
15
+ import { toast } from 'svelte-sonner';
16
 
17
  const inEval = $derived(page.url.searchParams.get('eval') === '1');
18
  const matches = $derived((page.data?.matches ?? []) as Match[]);
 
27
  }
28
  const queue = buildEvalQueue(matches);
29
  if (!queue.length) return;
30
+ // Resume on the first un-reviewed candidate so prior sessions aren't replayed.
31
+ const start = firstUnreviewed(queue);
32
+ if (start < 0) {
33
+ toast.success('All eval candidates already reviewed', {
34
+ description: `${queue.length} total. Clear local data in the Flag dialog to redo.`
35
+ });
36
+ return;
37
+ }
38
+ goto(evalUrl(queue[start], start));
39
  }
40
  </script>
41
 
 
51
  </a>
52
 
53
  <div class="ml-auto flex items-center gap-1">
54
+ {#if EVAL_ENABLED}
55
+ <Tooltip.Root>
56
+ <Tooltip.Trigger>
57
+ {#snippet child({ props })}
58
+ <Button
59
+ {...props}
60
+ variant="ghost"
61
+ size="icon-sm"
62
+ onclick={toggleEval}
63
+ disabled={!matches.length}
64
+ data-active={inEval || undefined}
65
+ class="data-active:bg-amber-500/15 data-active:text-amber-700 dark:data-active:text-amber-300"
66
+ aria-label={inEval ? 'Exit evaluation' : 'Start evaluation'}
67
+ >
68
+ <ListChecksIcon size={16} weight="duotone" />
69
+ </Button>
70
+ {/snippet}
71
+ </Tooltip.Trigger>
72
+ <Tooltip.Content side="bottom">
73
+ {inEval ? 'Exit evaluation' : 'Start evaluation'}
74
+ </Tooltip.Content>
75
+ </Tooltip.Root>
76
+ {/if}
77
  <Tooltip.Root>
78
  <Tooltip.Trigger>
79
  {#snippet child({ props })}
src/lib/eval.ts CHANGED
@@ -1,6 +1,16 @@
1
  import { browser } from '$app/environment';
 
2
  import type { Match } from '$lib/types';
3
 
 
 
 
 
 
 
 
 
 
4
  export type EvalCandidate = {
5
  matchId: number;
6
  mapName: string;
@@ -8,45 +18,119 @@ export type EvalCandidate = {
8
  };
9
 
10
  export type FlagReason =
 
 
 
 
11
  | 'missing_audio'
12
  | 'av_misaligned'
13
  | 'pov_desync'
14
  | 'uninteresting'
15
- | 'missing_video'
16
  | 'other';
17
 
18
- export const FLAG_REASONS: { id: FlagReason; label: string; description: string }[] = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  {
20
  id: 'missing_audio',
21
  label: 'Missing audio',
 
22
  description: 'No audio at all when there should be (gunfire, footsteps, callouts).'
23
  },
24
  {
25
  id: 'av_misaligned',
26
  label: 'Audio out of sync',
27
- description: 'Audio is offset from on-screen action — gunshots before the muzzle flash, etc.'
 
 
28
  },
29
  {
30
  id: 'pov_desync',
31
  label: 'POVs out of sync',
 
32
  description:
33
  'In grid mode, two players who should see the same moment are time-offset from each other.'
34
  },
35
  {
36
  id: 'uninteresting',
37
  label: 'Uninteresting gameplay',
 
38
  description: 'Pure AFK, intentional griefing, or otherwise unusable footage.'
39
  },
40
  {
41
- id: 'missing_video',
42
- label: 'Missing video',
 
 
 
 
 
 
 
 
 
 
43
  description:
44
- 'A POV stream stays blank after the round has buffered (i.e. not just a slow-network hiccup).'
 
 
 
 
 
45
  },
46
  {
47
- id: 'other',
48
- label: 'Other',
49
- description: 'Something else worth recording.'
50
  }
51
  ];
52
 
@@ -55,34 +139,77 @@ export type Flag = {
55
  mapName: string;
56
  round: number;
57
  reason: FlagReason;
 
 
 
 
 
 
 
 
58
  ts: number;
59
  };
60
 
61
  const FLAGS_KEY = 'opencs2:eval:flags:v1';
 
62
 
63
- export function loadFlags(): Flag[] {
64
  if (!browser) return [];
65
  try {
66
- return JSON.parse(localStorage.getItem(FLAGS_KEY) ?? '[]') as Flag[];
67
  } catch {
68
  return [];
69
  }
70
  }
71
-
72
- export function saveFlags(flags: Flag[]) {
73
  if (!browser) return;
74
- localStorage.setItem(FLAGS_KEY, JSON.stringify(flags));
75
  }
76
 
 
 
 
 
 
77
  export function addFlag(f: Omit<Flag, 'ts'>) {
78
  const flags = loadFlags();
79
  flags.push({ ...f, ts: Date.now() });
80
  saveFlags(flags);
81
  }
82
 
83
- // Mulberry32 small deterministic PRNG so the picked middle round is stable
84
- // for a given (match_id, map_name) and the eval set doesn't shift between
85
- // sessions.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  function prng(seed: number): number {
87
  let t = (seed + 0x6d2b79f5) | 0;
88
  t = Math.imul(t ^ (t >>> 15), t | 1);
@@ -91,9 +218,13 @@ function prng(seed: number): number {
91
  }
92
 
93
  /**
94
- * Evaluation policy: per (match, map), sample 3 rounds — first, last, and
95
- * one deterministic random round in the middle. Sorted by match_id, then
96
  * map_index to match the rest of the app's canonical order.
 
 
 
 
97
  */
98
  export function buildEvalQueue(matches: Match[]): EvalCandidate[] {
99
  const sorted = matches
@@ -106,10 +237,18 @@ export function buildEvalQueue(matches: Match[]): EvalCandidate[] {
106
  if (!total || total < 1) continue;
107
 
108
  const picks = new Set<number>([1, total]);
109
- if (total > 2) {
110
- const seed = m.match_id * 31 + m.map_name.length * 17 + (m.map_index ?? 0);
 
111
  const span = total - 2; // pick from [2, total - 1]
112
- picks.add(2 + Math.floor(prng(seed) * span));
 
 
 
 
 
 
 
113
  }
114
 
115
  for (const round of [...picks].sort((a, b) => a - b)) {
@@ -130,6 +269,33 @@ export function indexOfCandidate(
130
  );
131
  }
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  export function evalUrl(c: EvalCandidate, i: number): string {
134
  const params = new URLSearchParams({
135
  round: String(c.round),
@@ -140,3 +306,20 @@ export function evalUrl(c: EvalCandidate, i: number): string {
140
  });
141
  return `/match/${encodeURIComponent(c.matchId)}/${encodeURIComponent(c.mapName)}?${params}`;
142
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import { browser } from '$app/environment';
2
+ import { env } from '$env/dynamic/public';
3
  import type { Match } from '$lib/types';
4
 
5
+ /**
6
+ * Build-time switch. Set `PUBLIC_DISABLE_EVAL=1` (or `=true`) before running
7
+ * `bun run build` to ship a viewer with the entire evaluation surface
8
+ * (header button, eval bar, flag dialog) hidden. The eval module stays in
9
+ * the bundle but is never rendered.
10
+ */
11
+ export const EVAL_ENABLED =
12
+ env.PUBLIC_DISABLE_EVAL !== '1' && env.PUBLIC_DISABLE_EVAL?.toLowerCase() !== 'true';
13
+
14
  export type EvalCandidate = {
15
  matchId: number;
16
  mapName: string;
 
18
  };
19
 
20
  export type FlagReason =
21
+ | 'victory_screen'
22
+ | 'wrong_initial_position'
23
+ | 'no_animation'
24
+ | 'missing_video'
25
  | 'missing_audio'
26
  | 'av_misaligned'
27
  | 'pov_desync'
28
  | 'uninteresting'
 
29
  | 'other';
30
 
31
+ export type FlagSeverity = 'major' | 'minor';
32
+
33
+ export type FlagReasonInfo = {
34
+ id: FlagReason;
35
+ label: string;
36
+ description: string;
37
+ severity: FlagSeverity;
38
+ examples?: string[];
39
+ };
40
+
41
+ // Severity is informational — both major and minor are still flaggable. The
42
+ // `examples` URLs are stable links to a representative case so a reviewer can
43
+ // confirm what the failure mode looks like.
44
+ export const FLAG_REASONS: FlagReasonInfo[] = [
45
+ {
46
+ id: 'victory_screen',
47
+ label: 'Victory screen instead of POV',
48
+ severity: 'major',
49
+ description:
50
+ "Round-end / scoreboard screen renders in place of the player's first-person view, usually for the whole round on the same player slot. Renders are essentially unusable.",
51
+ examples: [
52
+ 'https://blanchon-opencs2-dataset-viewer.hf.space/match/2393397/de_overpass?round=1&player=7&view=grid'
53
+ ]
54
+ },
55
+ {
56
+ id: 'wrong_initial_position',
57
+ label: 'Wrong initial position',
58
+ severity: 'major',
59
+ description:
60
+ 'Player is not at their spawn point at the very first tick of the round (most visible at t=0).',
61
+ examples: [
62
+ 'https://blanchon-opencs2-dataset-viewer.hf.space/match/2392873/de_dust2?round=1&player=2&view=grid',
63
+ 'https://blanchon-opencs2-dataset-viewer.hf.space/match/2392873/de_mirage?round=1&player=0&view=grid'
64
+ ]
65
+ },
66
+ {
67
+ id: 'no_animation',
68
+ label: 'No animation',
69
+ severity: 'major',
70
+ description:
71
+ 'Player or world animations stop playing — character moves through space but limbs / weapons / world stay frozen.',
72
+ examples: [
73
+ 'https://blanchon-opencs2-dataset-viewer.hf.space/match/2392131/de_mirage?round=1&player=2'
74
+ ]
75
+ },
76
+ {
77
+ id: 'missing_video',
78
+ label: 'Missing video',
79
+ severity: 'major',
80
+ description:
81
+ 'A POV stream stays blank after the round has buffered (i.e. not just a slow-network hiccup).'
82
+ },
83
  {
84
  id: 'missing_audio',
85
  label: 'Missing audio',
86
+ severity: 'major',
87
  description: 'No audio at all when there should be (gunfire, footsteps, callouts).'
88
  },
89
  {
90
  id: 'av_misaligned',
91
  label: 'Audio out of sync',
92
+ severity: 'major',
93
+ description:
94
+ 'Audio is offset from on-screen action — gunshots before the muzzle flash, footsteps lagging the movement, etc.'
95
  },
96
  {
97
  id: 'pov_desync',
98
  label: 'POVs out of sync',
99
+ severity: 'major',
100
  description:
101
  'In grid mode, two players who should see the same moment are time-offset from each other.'
102
  },
103
  {
104
  id: 'uninteresting',
105
  label: 'Uninteresting gameplay',
106
+ severity: 'minor',
107
  description: 'Pure AFK, intentional griefing, or otherwise unusable footage.'
108
  },
109
  {
110
+ id: 'other',
111
+ label: 'Other',
112
+ severity: 'minor',
113
+ description: 'Something else worth recording — use the notes field to describe.'
114
+ }
115
+ ];
116
+
117
+ // Cosmetic / known issues that look like problems but aren't — listed in the
118
+ // flag dialog so reviewers stop reporting them.
119
+ export const KNOWN_MINOR_ISSUES: { label: string; description: string; examples?: string[] }[] = [
120
+ {
121
+ label: '"Terrorist/CT win" tail at round start',
122
  description:
123
+ 'A short sting from the previous round can leak into the start of the next round. Cosmetic, not a render bug — please skip.',
124
+ examples: [
125
+ 'https://blanchon-opencs2-dataset-viewer.hf.space/match/2392131/de_mirage?round=16&player=0&view=grid',
126
+ 'https://blanchon-opencs2-dataset-viewer.hf.space/match/2393398/de_dust2?round=16&player=4&view=grid',
127
+ 'https://blanchon-opencs2-dataset-viewer.hf.space/match/2393178/de_dust2?round=27&player=0&view=grid'
128
+ ]
129
  },
130
  {
131
+ label: 'Recording ends right at death',
132
+ description:
133
+ "Each POV is cut on the exact frame the player dies. On headshots that can feel like the recording ended too soon — it's intentional, to avoid the camera snapping to the killer or glitching out around the death tick."
134
  }
135
  ];
136
 
 
139
  mapName: string;
140
  round: number;
141
  reason: FlagReason;
142
+ note?: string;
143
+ ts: number;
144
+ };
145
+
146
+ export type Validation = {
147
+ matchId: number;
148
+ mapName: string;
149
+ round: number;
150
  ts: number;
151
  };
152
 
153
  const FLAGS_KEY = 'opencs2:eval:flags:v1';
154
+ const VALIDATIONS_KEY = 'opencs2:eval:validations:v1';
155
 
156
+ function loadJson<T>(key: string): T[] {
157
  if (!browser) return [];
158
  try {
159
+ return JSON.parse(localStorage.getItem(key) ?? '[]') as T[];
160
  } catch {
161
  return [];
162
  }
163
  }
164
+ function saveJson<T>(key: string, v: T[]) {
 
165
  if (!browser) return;
166
+ localStorage.setItem(key, JSON.stringify(v));
167
  }
168
 
169
+ export const loadFlags = () => loadJson<Flag>(FLAGS_KEY);
170
+ export const saveFlags = (v: Flag[]) => saveJson(FLAGS_KEY, v);
171
+ export const loadValidations = () => loadJson<Validation>(VALIDATIONS_KEY);
172
+ export const saveValidations = (v: Validation[]) => saveJson(VALIDATIONS_KEY, v);
173
+
174
  export function addFlag(f: Omit<Flag, 'ts'>) {
175
  const flags = loadFlags();
176
  flags.push({ ...f, ts: Date.now() });
177
  saveFlags(flags);
178
  }
179
 
180
+ export function addValidation(v: Omit<Validation, 'ts'>) {
181
+ const validations = loadValidations();
182
+ const key = `${v.matchId}|${v.mapName}|${v.round}`;
183
+ if (validations.some((x) => `${x.matchId}|${x.mapName}|${x.round}` === key)) return;
184
+ validations.push({ ...v, ts: Date.now() });
185
+ saveValidations(validations);
186
+ }
187
+
188
+ export function clearFlags() {
189
+ if (browser) localStorage.removeItem(FLAGS_KEY);
190
+ }
191
+ export function clearValidations() {
192
+ if (browser) localStorage.removeItem(VALIDATIONS_KEY);
193
+ }
194
+
195
+ const candidateKey = (c: { matchId: number; mapName: string; round: number }) =>
196
+ `${c.matchId}|${c.mapName}|${c.round}`;
197
+
198
+ /**
199
+ * Set of (matchId, mapName, round) keys that have been "reviewed" — either
200
+ * flagged or validated by clicking Next. Used to skip already-seen
201
+ * candidates when computing the next position in eval mode.
202
+ */
203
+ export function reviewedKeySet(): Set<string> {
204
+ const set = new Set<string>();
205
+ for (const f of loadFlags()) set.add(candidateKey(f));
206
+ for (const v of loadValidations()) set.add(candidateKey(v));
207
+ return set;
208
+ }
209
+
210
+ // Mulberry32 — small deterministic PRNG so the picked middle rounds are
211
+ // stable for a given (match_id, map_name) and the eval set doesn't shift
212
+ // between sessions.
213
  function prng(seed: number): number {
214
  let t = (seed + 0x6d2b79f5) | 0;
215
  t = Math.imul(t ^ (t >>> 15), t | 1);
 
218
  }
219
 
220
  /**
221
+ * Evaluation policy: per (match, map), sample 4 rounds — first, last, and
222
+ * two deterministic random rounds in the middle. Sorted by match_id then
223
  * map_index to match the rest of the app's canonical order.
224
+ *
225
+ * Validating ALL 4 candidates of a (match, map) without flagging implies the
226
+ * whole match-map is good; flagging any one of the 4 marks the match-map as
227
+ * having issues.
228
  */
229
  export function buildEvalQueue(matches: Match[]): EvalCandidate[] {
230
  const sorted = matches
 
237
  if (!total || total < 1) continue;
238
 
239
  const picks = new Set<number>([1, total]);
240
+ // Two PRNG-picked middle rounds when there's room for them.
241
+ if (total >= 4) {
242
+ const seedBase = m.match_id * 31 + m.map_name.length * 17 + (m.map_index ?? 0);
243
  const span = total - 2; // pick from [2, total - 1]
244
+ let attempts = 0;
245
+ while (picks.size < 4 && attempts < 16) {
246
+ const r = 2 + Math.floor(prng(seedBase + attempts) * span);
247
+ picks.add(r);
248
+ attempts++;
249
+ }
250
+ } else if (total === 3) {
251
+ picks.add(2);
252
  }
253
 
254
  for (const round of [...picks].sort((a, b) => a - b)) {
 
269
  );
270
  }
271
 
272
+ /**
273
+ * Find the next un-reviewed candidate (strictly after `fromIndex`). Returns
274
+ * the index in `queue`, or -1 if all remaining candidates have been
275
+ * reviewed.
276
+ */
277
+ export function nextUnreviewed(
278
+ queue: EvalCandidate[],
279
+ fromIndex: number,
280
+ reviewed: Set<string> = reviewedKeySet()
281
+ ): number {
282
+ for (let i = fromIndex + 1; i < queue.length; i++) {
283
+ if (!reviewed.has(candidateKey(queue[i]))) return i;
284
+ }
285
+ return -1;
286
+ }
287
+
288
+ /** First un-reviewed candidate index, or -1 if everything is done. */
289
+ export function firstUnreviewed(
290
+ queue: EvalCandidate[],
291
+ reviewed: Set<string> = reviewedKeySet()
292
+ ): number {
293
+ for (let i = 0; i < queue.length; i++) {
294
+ if (!reviewed.has(candidateKey(queue[i]))) return i;
295
+ }
296
+ return -1;
297
+ }
298
+
299
  export function evalUrl(c: EvalCandidate, i: number): string {
300
  const params = new URLSearchParams({
301
  round: String(c.round),
 
306
  });
307
  return `/match/${encodeURIComponent(c.matchId)}/${encodeURIComponent(c.mapName)}?${params}`;
308
  }
309
+
310
+ export type ReviewExport = {
311
+ exportedAt: string;
312
+ totalCandidates: number;
313
+ flags: Flag[];
314
+ validations: Validation[];
315
+ };
316
+
317
+ /** Snapshot of the current localStorage state for sharing/exporting. */
318
+ export function exportReviews(queueLength: number): ReviewExport {
319
+ return {
320
+ exportedAt: new Date().toISOString(),
321
+ totalCandidates: queueLength,
322
+ flags: loadFlags(),
323
+ validations: loadValidations()
324
+ };
325
+ }
src/routes/match/[matchId]/[mapName]/+page.svelte CHANGED
@@ -4,7 +4,7 @@
4
  import { goto } from '$app/navigation';
5
  import Header from '$lib/components/header.svelte';
6
  import EvalBar from '$lib/components/eval-bar.svelte';
7
- import { buildEvalQueue, indexOfCandidate } from '$lib/eval';
8
  import RoundList from '$lib/components/round-list.svelte';
9
  import PlayerGrid from '$lib/components/player-grid.svelte';
10
  import VideoStage from '$lib/components/video-stage.svelte';
@@ -79,7 +79,7 @@
79
 
80
  // Evaluation mode driven by ?eval=1&i=N. Queue is deterministic from the
81
  // match list so reloading or sharing a URL still lands on the same item.
82
- const inEvalMode = $derived(page.url.searchParams.get('eval') === '1');
83
  const evalQueue = $derived(inEvalMode ? buildEvalQueue(data.matches) : []);
84
  const evalIndex = $derived.by(() => {
85
  if (!inEvalMode) return -1;
 
4
  import { goto } from '$app/navigation';
5
  import Header from '$lib/components/header.svelte';
6
  import EvalBar from '$lib/components/eval-bar.svelte';
7
+ import { buildEvalQueue, indexOfCandidate, EVAL_ENABLED } from '$lib/eval';
8
  import RoundList from '$lib/components/round-list.svelte';
9
  import PlayerGrid from '$lib/components/player-grid.svelte';
10
  import VideoStage from '$lib/components/video-stage.svelte';
 
79
 
80
  // Evaluation mode driven by ?eval=1&i=N. Queue is deterministic from the
81
  // match list so reloading or sharing a URL still lands on the same item.
82
+ const inEvalMode = $derived(EVAL_ENABLED && page.url.searchParams.get('eval') === '1');
83
  const evalQueue = $derived(inEvalMode ? buildEvalQueue(data.matches) : []);
84
  const evalIndex = $derived.by(() => {
85
  if (!inEvalMode) return -1;