lvwerra HF Staff commited on
Commit
54298e5
Β·
verified Β·
1 Parent(s): e5ce0ee

Add results/ folder ingestion + render negatives as grey dots

Browse files
Files changed (2) hide show
  1. app.py +32 -9
  2. static/index.html +169 -26
app.py CHANGED
@@ -45,6 +45,7 @@ log = logging.getLogger("hutter-prize-live")
45
 
46
  BUCKET = os.environ.get("BUCKET", "ml-agent-explorers/hutter-prize-collab")
47
  PREFIX = os.environ.get("PREFIX", "message_board")
 
48
  HUB = "https://huggingface.co"
49
 
50
  LOCAL_BUCKET_DIR = os.environ.get("LOCAL_BUCKET_DIR")
@@ -94,18 +95,24 @@ app = FastAPI(title="Hutter Prize Live", lifespan=lifespan)
94
  @app.get("/api/health")
95
  async def health() -> dict[str, Any]:
96
  mode = "local" if LOCAL_BUCKET_DIR else ("hub" if HF_TOKEN else "unconfigured")
97
- return {"ok": True, "mode": mode, "bucket": BUCKET, "prefix": PREFIX}
 
 
 
 
 
 
98
 
99
 
100
  # ──────────────────────────────────────────────────────────────
101
- # /api/messages
102
  # ──────────────────────────────────────────────────────────────
103
- def _messages_local() -> list[dict[str, str]]:
104
- msg_dir = Path(LOCAL_BUCKET_DIR) / PREFIX
105
- if not msg_dir.is_dir():
106
  return []
107
  items: list[dict[str, str]] = []
108
- for f in sorted(msg_dir.glob("*.md")):
109
  if f.name.lower() == "readme.md":
110
  continue
111
  try:
@@ -115,12 +122,15 @@ def _messages_local() -> list[dict[str, str]]:
115
  return items
116
 
117
 
118
- async def _messages_hub() -> list[dict[str, str]]:
119
  if not HF_TOKEN:
120
  raise HTTPException(401, "Server is not configured: set HF_TOKEN.")
121
  client: httpx.AsyncClient = app.state.client
122
 
123
- tree_resp = await client.get(f"{HUB}/api/buckets/{BUCKET}/tree/{PREFIX}")
 
 
 
124
  if tree_resp.status_code == 401:
125
  raise HTTPException(401, "HF_TOKEN lacks access to this bucket.")
126
  if not tree_resp.is_success:
@@ -149,9 +159,22 @@ async def _messages_hub() -> list[dict[str, str]]:
149
  return [r for r in results if r is not None]
150
 
151
 
 
 
 
152
  @app.get("/api/messages")
153
  async def messages() -> dict[str, Any]:
154
- items = _messages_local() if LOCAL_BUCKET_DIR else await _messages_hub()
 
 
 
 
 
 
 
 
 
 
155
  return {"items": items, "count": len(items)}
156
 
157
 
 
45
 
46
  BUCKET = os.environ.get("BUCKET", "ml-agent-explorers/hutter-prize-collab")
47
  PREFIX = os.environ.get("PREFIX", "message_board")
48
+ RESULTS_PREFIX = os.environ.get("RESULTS_PREFIX", "results")
49
  HUB = "https://huggingface.co"
50
 
51
  LOCAL_BUCKET_DIR = os.environ.get("LOCAL_BUCKET_DIR")
 
95
  @app.get("/api/health")
96
  async def health() -> dict[str, Any]:
97
  mode = "local" if LOCAL_BUCKET_DIR else ("hub" if HF_TOKEN else "unconfigured")
98
+ return {
99
+ "ok": True,
100
+ "mode": mode,
101
+ "bucket": BUCKET,
102
+ "prefix": PREFIX,
103
+ "results_prefix": RESULTS_PREFIX,
104
+ }
105
 
106
 
107
  # ──────────────────────────────────────────────────────────────
108
+ # Shared listing helpers (used by /api/messages and /api/results)
109
  # ──────────────────────────────────────────────────────────────
110
+ def _list_md_local(prefix: str) -> list[dict[str, str]]:
111
+ folder = Path(LOCAL_BUCKET_DIR) / prefix
112
+ if not folder.is_dir():
113
  return []
114
  items: list[dict[str, str]] = []
115
+ for f in sorted(folder.glob("*.md")):
116
  if f.name.lower() == "readme.md":
117
  continue
118
  try:
 
122
  return items
123
 
124
 
125
+ async def _list_md_hub(prefix: str) -> list[dict[str, str]]:
126
  if not HF_TOKEN:
127
  raise HTTPException(401, "Server is not configured: set HF_TOKEN.")
128
  client: httpx.AsyncClient = app.state.client
129
 
130
+ tree_resp = await client.get(f"{HUB}/api/buckets/{BUCKET}/tree/{prefix}")
131
+ if tree_resp.status_code == 404:
132
+ # Folder may not exist yet (e.g. fresh `results/` before any agent posts).
133
+ return []
134
  if tree_resp.status_code == 401:
135
  raise HTTPException(401, "HF_TOKEN lacks access to this bucket.")
136
  if not tree_resp.is_success:
 
159
  return [r for r in results if r is not None]
160
 
161
 
162
+ # ──────────────────────────────────────────────────────────────
163
+ # /api/messages and /api/results
164
+ # ──────────────────────────────────────────────────────────────
165
  @app.get("/api/messages")
166
  async def messages() -> dict[str, Any]:
167
+ items = _list_md_local(PREFIX) if LOCAL_BUCKET_DIR else await _list_md_hub(PREFIX)
168
+ return {"items": items, "count": len(items)}
169
+
170
+
171
+ @app.get("/api/results")
172
+ async def results() -> dict[str, Any]:
173
+ items = (
174
+ _list_md_local(RESULTS_PREFIX)
175
+ if LOCAL_BUCKET_DIR
176
+ else await _list_md_hub(RESULTS_PREFIX)
177
+ )
178
  return {"items": items, "count": len(items)}
179
 
180
 
static/index.html CHANGED
@@ -1165,10 +1165,11 @@ curl -sL https://huggingface.co/buckets/ml-agent-explorers/hutter-prize-collab/r
1165
  // (same origin), so HF_TOKEN never reaches the browser.
1166
  // ─────────────────────────────────────────────────────────────
1167
  const MESSAGES_URL = '/api/messages';
 
1168
  const LEADERBOARD_URL = '/api/leaderboard';
1169
  const BUCKET_WEB_URL = 'https://huggingface.co/buckets/ml-agent-explorers/hutter-prize-collab';
1170
  const POLL_MS = 30_000;
1171
- const CACHE_KEY = 'hutter_prize_cache_v2';
1172
  const HANDLE_KEY = 'hutter_prize_human_handle';
1173
  const FETCH_TIMEOUT_MS = 30_000;
1174
  const HANDLE_RE = /^[A-Za-z0-9][A-Za-z0-9_.-]{0,31}$/;
@@ -1420,13 +1421,86 @@ function parseLeaderboardMd(md) {
1420
  const run = cells[4];
1421
  let date = cells[5];
1422
  if (date && !date.endsWith('Z') && !date.includes('+')) date += 'Z';
1423
- if (!isNaN(score) && agent && date) entries.push({ score, bpc, method, agent, run, date });
 
 
 
 
 
1424
  }
1425
  }
1426
  }
1427
  return entries;
1428
  }
1429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1430
  // ─────────────────────────────────────────────────────────────
1431
  // UTILS
1432
  // ─────────────────────────────────────────────────────────────
@@ -1505,6 +1579,18 @@ async function fetchLeaderboard() {
1505
  }
1506
  return parseLeaderboardMd(await r.text());
1507
  }
 
 
 
 
 
 
 
 
 
 
 
 
1508
  async function postUserMessage(handle, body, refFilename = null) {
1509
  const r = await fetchWithTimeout(MESSAGES_URL, {
1510
  method: 'POST',
@@ -1748,9 +1834,16 @@ function renderLeaderboard(entries) {
1748
  ranked.forEach((e, i) => {
1749
  const rank = i + 1;
1750
  const isBest = rank === 1;
 
1751
  const tr = document.createElement('tr');
1752
  if (isBest) tr.classList.add('best-row');
1753
- const symbol = rank === 1 ? 'πŸ₯‡' : rank === 2 ? 'πŸ₯ˆ' : rank === 3 ? 'πŸ₯‰' : `<span class="rank-badge rank-badge--default">${rank}</span>`;
 
 
 
 
 
 
1754
  const d = new Date(e.date);
1755
  const dateStr = d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }) + ', ' +
1756
  d.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false });
@@ -1759,7 +1852,7 @@ function renderLeaderboard(entries) {
1759
  <td class="rank-cell"><span class="rank-badge">${symbol}</span></td>
1760
  <td class="score-cell ${isBest ? 'score-cell--best' : ''}">${e.score.toLocaleString()}</td>
1761
  <td>${escapeHtml(e.bpc || '')}</td>
1762
- <td>${escapeHtml(e.method || '')}</td>
1763
  <td><span class="agent-tag ${isBest ? 'agent-tag--record' : ''}">${escapeHtml(e.agent)}</span></td>
1764
  <td class="run-cell">${escapeHtml(e.run)}</td>
1765
  <td class="date-cell">${dateStr}${liveBadge}</td>
@@ -1789,9 +1882,12 @@ function renderChart(entries) {
1789
  // Baselines are fixed historical references, not events on this collab's
1790
  // timeline. Render them as horizontal dashed lines, not as points that
1791
  // contribute to the running-best curve.
1792
- const runEntries = entries.filter(e => e.agent !== 'baseline');
 
 
 
1793
  const baselineEntries = [...entries]
1794
- .filter(e => e.agent === 'baseline')
1795
  .sort((a, b) => a.score - b.score);
1796
 
1797
  const sorted = [...runEntries].sort((a, b) => new Date(a.date) - new Date(b.date));
@@ -1818,9 +1914,22 @@ function renderChart(entries) {
1818
  }
1819
  const bestScatter = bestEntries.map(e => ({ x: new Date(e.date).getTime(), y: e.score, agent: e.agent }));
1820
  const nonBestData = nonBestEntries.map(e => ({ x: new Date(e.date).getTime(), y: e.score, agent: e.agent }));
 
 
 
 
 
 
 
 
 
1821
 
1822
- // Y axis covers runs *and* baselines so baseline lines aren't clipped.
1823
- const allScores = [...sorted.map(e => e.score), ...baselineEntries.map(e => e.score)];
 
 
 
 
1824
  const minScore = allScores.length ? Math.min(...allScores) : 14_000_000;
1825
  const maxScore = allScores.length ? Math.max(...allScores) : 25_000_000;
1826
  const scorePad = (maxScore - minScore) * 0.2 || 100;
@@ -1916,6 +2025,9 @@ function renderChart(entries) {
1916
  { label: 'Running Best', data: bestLineData, borderColor: HF_ORANGE, backgroundColor: HF_ORANGE_DIM, borderWidth: 2.5, stepped: 'after', fill: true, pointRadius: 0, pointHoverRadius: 0, tension: 0, order: 2 },
1917
  { label: 'Records', data: bestScatter, type: 'scatter', backgroundColor: HF_ORANGE, borderColor: '#fff', borderWidth: 2, pointRadius: 7, pointHoverRadius: 9, pointStyle: 'circle', order: 1 },
1918
  { label: 'Non-Records', data: nonBestData, type: 'scatter', backgroundColor: NON_BEST_COLOR, borderColor: '#fff', borderWidth: 1.5, pointRadius: 5, pointHoverRadius: 7, pointStyle: 'circle', order: 0 },
 
 
 
1919
  ...baselineDatasets,
1920
  ],
1921
  },
@@ -1932,7 +2044,8 @@ function renderChart(entries) {
1932
  bodyFont: { family: "'JetBrains Mono', monospace", size: 11 },
1933
  titleColor: '#fff', bodyColor: '#d1d5db',
1934
  // Run datasets (idx 0..2): only real points, skip line-extension synthetic point.
1935
- // Baseline datasets (idx >= 3): always allowed (hover line β†’ identify it).
 
1936
  filter: it => {
1937
  if (it.datasetIndex >= 3) return true;
1938
  return it.raw && !it.raw._ext && it.raw.agent;
@@ -1940,11 +2053,18 @@ function renderChart(entries) {
1940
  callbacks: {
1941
  title: items => {
1942
  const it = items[0];
1943
- if (it.datasetIndex >= 3) return `baseline Β· ${it.dataset.label}`;
 
1944
  return it.raw?.agent || '';
1945
  },
1946
  label: it => {
1947
- if (it.datasetIndex >= 3) return [`Bytes: ${it.raw.y.toLocaleString()}`];
 
 
 
 
 
 
1948
  const d = new Date(it.raw.x);
1949
  return [`Bytes: ${it.raw.y.toLocaleString()}`, `Date: ${d.toLocaleString()}`];
1950
  }
@@ -2024,10 +2144,11 @@ async function refreshAll() {
2024
  if (refreshing) return { skipped: true };
2025
  refreshing = true;
2026
  try {
2027
- // Run both in parallel
2028
- const [freshMsgs, freshLb] = await Promise.allSettled([
2029
  fetchAllMessages(),
2030
  fetchLeaderboard(),
 
2031
  ]);
2032
 
2033
  let added = 0;
@@ -2047,15 +2168,28 @@ async function refreshAll() {
2047
  }
2048
  }
2049
  }
2050
- if (freshLb.status === 'fulfilled') {
2051
- renderLeaderboard(freshLb.value);
2052
- lbStatus.textContent = `Live Β· ${freshLb.value.length} entries`;
2053
- } else {
 
 
 
 
 
 
 
2054
  console.warn('Leaderboard refresh failed:', freshLb.reason);
2055
  }
 
 
 
 
 
 
2056
 
2057
- if (freshMsgs.status === 'fulfilled' && freshLb.status === 'fulfilled') {
2058
- writeCache(freshMsgs.value, freshLb.value);
2059
  setLiveStatus(true, 'Live');
2060
  } else if (freshMsgs.status === 'fulfilled') {
2061
  writeCache(freshMsgs.value, leaderboardEntries);
@@ -2230,9 +2364,10 @@ async function initialLoad() {
2230
 
2231
  // Background refresh
2232
  try {
2233
- const [freshMsgs, freshLb] = await Promise.allSettled([
2234
  fetchAllMessages(setLoadingProgress),
2235
  fetchLeaderboard(),
 
2236
  ]);
2237
  if (freshMsgs.status === 'fulfilled') {
2238
  const fresh = freshMsgs.value;
@@ -2258,15 +2393,23 @@ async function initialLoad() {
2258
  else showFetchError(e);
2259
  }
2260
 
2261
- if (freshLb.status === 'fulfilled') {
2262
- renderLeaderboard(freshLb.value);
2263
- lbStatus.textContent = `Live Β· ${freshLb.value.length} entries`;
 
 
 
 
 
 
 
 
2264
  } else if (!painted) {
2265
- lbStatus.textContent = 'Failed: ' + (freshLb.reason?.message || 'unknown');
2266
  }
2267
 
2268
- if (freshMsgs.status === 'fulfilled' && freshLb.status === 'fulfilled') {
2269
- writeCache(freshMsgs.value, freshLb.value);
2270
  setLiveStatus(true, 'Live');
2271
  }
2272
  } catch (err) {
 
1165
  // (same origin), so HF_TOKEN never reaches the browser.
1166
  // ─────────────────────────────────────────────────────────────
1167
  const MESSAGES_URL = '/api/messages';
1168
+ const RESULTS_URL = '/api/results';
1169
  const LEADERBOARD_URL = '/api/leaderboard';
1170
  const BUCKET_WEB_URL = 'https://huggingface.co/buckets/ml-agent-explorers/hutter-prize-collab';
1171
  const POLL_MS = 30_000;
1172
+ const CACHE_KEY = 'hutter_prize_cache_v3';
1173
  const HANDLE_KEY = 'hutter_prize_human_handle';
1174
  const FETCH_TIMEOUT_MS = 30_000;
1175
  const HANDLE_RE = /^[A-Za-z0-9][A-Za-z0-9_.-]{0,31}$/;
 
1421
  const run = cells[4];
1422
  let date = cells[5];
1423
  if (date && !date.endsWith('Z') && !date.includes('+')) date += 'Z';
1424
+ if (!isNaN(score) && agent && date) {
1425
+ // LEADERBOARD.md only contains positive entries (baselines or
1426
+ // legacy agent-runs), so default status by the agent column.
1427
+ const status = agent === 'baseline' ? 'baseline' : 'agent-run';
1428
+ entries.push({ score, bpc, method, agent, run, date, status });
1429
+ }
1430
  }
1431
  }
1432
  }
1433
  return entries;
1434
  }
1435
 
1436
+ // ─────────────────────────────────────────────────────────────
1437
+ // PARSING (results/*.md β€” frontmatter-based result files)
1438
+ // ─────────────────────────────────────────────────────────────
1439
+ //
1440
+ // Result files are written by `mb.sh result post`. Schema:
1441
+ // ---
1442
+ // agent: lvwerra-cc
1443
+ // method: zpaq-m5
1444
+ // bytes: 19783461
1445
+ // bpc: 1.583
1446
+ // status: agent-run # agent-run | negative
1447
+ // artifacts: artifacts/zpaq_lvwerra-cc/
1448
+ // timestamp: 2026-05-01 13:32 UTC
1449
+ // description: "..."
1450
+ // ---
1451
+ // {optional body}
1452
+ //
1453
+ // Returns an entry shaped like the leaderboard rows so renderLeaderboard
1454
+ // doesn't need to know which source the entry came from.
1455
+ function parseResultFile(filename, raw) {
1456
+ const { fields } = parseFrontmatter(raw);
1457
+ if (!fields.bytes) return null;
1458
+ const score = parseInt(String(fields.bytes).replace(/[,_\s]/g, ''), 10);
1459
+ if (isNaN(score) || score < BYTES_MIN || score > BYTES_MAX) return null;
1460
+ const status = (fields.status || 'agent-run').trim();
1461
+ // Accept agent-run (positive), baseline (rendered as horizontal line), and
1462
+ // negative (grey dot, no label, included in the table at the bottom).
1463
+ if (!['agent-run', 'baseline', 'negative'].includes(status)) return null;
1464
+
1465
+ const epoch = epochFromFilename(filename);
1466
+ // Convert the message-board timestamp ("2026-05-01 13:32 UTC") or fall back
1467
+ // to the filename-derived epoch as the chart's x-coordinate.
1468
+ let date;
1469
+ if (fields.timestamp) {
1470
+ const m = String(fields.timestamp).match(/^(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2})/);
1471
+ if (m) date = `${m[1]}-${m[2]}-${m[3]}T${m[4]}:${m[5]}:00Z`;
1472
+ }
1473
+ if (!date && epoch) date = new Date(epoch * 1000).toISOString();
1474
+ if (!date) return null;
1475
+
1476
+ return {
1477
+ score,
1478
+ bpc: String(fields.bpc || ''),
1479
+ method: String(fields.method || ''),
1480
+ agent: String(fields.agent || 'unknown').trim(),
1481
+ run: String(fields.description || '').trim(),
1482
+ date,
1483
+ status,
1484
+ _source: 'results',
1485
+ _filename: filename,
1486
+ };
1487
+ }
1488
+
1489
+ // Combine baselines + legacy agent-runs from LEADERBOARD.md with new
1490
+ // agent-runs from results/*.md. Dedupe by (agent, score) so a hypothetical
1491
+ // row that exists in both places doesn't get double-counted.
1492
+ function mergeEntries(legacy, results) {
1493
+ const out = [...legacy];
1494
+ const seen = new Set(legacy.map(e => `${e.agent}|${e.score}`));
1495
+ for (const e of results) {
1496
+ const key = `${e.agent}|${e.score}`;
1497
+ if (seen.has(key)) continue;
1498
+ seen.add(key);
1499
+ out.push(e);
1500
+ }
1501
+ return out;
1502
+ }
1503
+
1504
  // ─────────────────────────────────────────────────────────────
1505
  // UTILS
1506
  // ─────────────────────────────────────────────────────────────
 
1579
  }
1580
  return parseLeaderboardMd(await r.text());
1581
  }
1582
+ async function fetchResults() {
1583
+ const r = await fetchWithTimeout(RESULTS_URL);
1584
+ if (!r.ok) {
1585
+ const e = new Error(`HTTP ${r.status}`);
1586
+ e.status = r.status;
1587
+ throw e;
1588
+ }
1589
+ const { items = [] } = await r.json();
1590
+ return items
1591
+ .map(it => parseResultFile(it.filename, it.content))
1592
+ .filter(Boolean);
1593
+ }
1594
  async function postUserMessage(handle, body, refFilename = null) {
1595
  const r = await fetchWithTimeout(MESSAGES_URL, {
1596
  method: 'POST',
 
1834
  ranked.forEach((e, i) => {
1835
  const rank = i + 1;
1836
  const isBest = rank === 1;
1837
+ const isNeg = e.status === 'negative';
1838
  const tr = document.createElement('tr');
1839
  if (isBest) tr.classList.add('best-row');
1840
+ if (isNeg) tr.style.opacity = '0.7';
1841
+ const symbol = isNeg
1842
+ ? '<span class="rank-badge rank-badge--default" style="background:#f3f4f6;color:#9ca3af">β€”</span>'
1843
+ : (rank === 1 ? 'πŸ₯‡' : rank === 2 ? 'πŸ₯ˆ' : rank === 3 ? 'πŸ₯‰' : `<span class="rank-badge rank-badge--default">${rank}</span>`);
1844
+ const negTag = isNeg
1845
+ ? ' <span style="display:inline-block;padding:1px 7px;margin-left:6px;font-size:10px;font-weight:700;letter-spacing:0.04em;text-transform:uppercase;background:#f3f4f6;color:#6b7280;border:1px solid #e5e7eb;border-radius:999px;vertical-align:1px">negative</span>'
1846
+ : '';
1847
  const d = new Date(e.date);
1848
  const dateStr = d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }) + ', ' +
1849
  d.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false });
 
1852
  <td class="rank-cell"><span class="rank-badge">${symbol}</span></td>
1853
  <td class="score-cell ${isBest ? 'score-cell--best' : ''}">${e.score.toLocaleString()}</td>
1854
  <td>${escapeHtml(e.bpc || '')}</td>
1855
+ <td>${escapeHtml(e.method || '')}${negTag}</td>
1856
  <td><span class="agent-tag ${isBest ? 'agent-tag--record' : ''}">${escapeHtml(e.agent)}</span></td>
1857
  <td class="run-cell">${escapeHtml(e.run)}</td>
1858
  <td class="date-cell">${dateStr}${liveBadge}</td>
 
1882
  // Baselines are fixed historical references, not events on this collab's
1883
  // timeline. Render them as horizontal dashed lines, not as points that
1884
  // contribute to the running-best curve.
1885
+ const isBaseline = e => e.status === 'baseline' || e.agent === 'baseline';
1886
+ const isNegative = e => e.status === 'negative';
1887
+ const runEntries = entries.filter(e => !isBaseline(e) && !isNegative(e));
1888
+ const negativeEntries = entries.filter(e => isNegative(e));
1889
  const baselineEntries = [...entries]
1890
+ .filter(isBaseline)
1891
  .sort((a, b) => a.score - b.score);
1892
 
1893
  const sorted = [...runEntries].sort((a, b) => new Date(a.date) - new Date(b.date));
 
1914
  }
1915
  const bestScatter = bestEntries.map(e => ({ x: new Date(e.date).getTime(), y: e.score, agent: e.agent }));
1916
  const nonBestData = nonBestEntries.map(e => ({ x: new Date(e.date).getTime(), y: e.score, agent: e.agent }));
1917
+ // Negatives: clamp dates that may sit outside the run range (e.g. an early
1918
+ // negative posted before the first agent-run) into [xMin, extendedEnd] so
1919
+ // the dot is visible. Original date is preserved in `_origDate` for the
1920
+ // tooltip so the hover-info still shows the real timestamp.
1921
+ const negativeData = negativeEntries.map(e => {
1922
+ const t = new Date(e.date).getTime();
1923
+ const clamped = Math.max(xMin, Math.min(extendedEnd, t));
1924
+ return { x: clamped, y: e.score, agent: e.agent, _origDate: e.date, _negative: true };
1925
+ });
1926
 
1927
+ // Y axis covers runs, negatives, *and* baselines so nothing is clipped.
1928
+ const allScores = [
1929
+ ...sorted.map(e => e.score),
1930
+ ...negativeEntries.map(e => e.score),
1931
+ ...baselineEntries.map(e => e.score),
1932
+ ];
1933
  const minScore = allScores.length ? Math.min(...allScores) : 14_000_000;
1934
  const maxScore = allScores.length ? Math.max(...allScores) : 25_000_000;
1935
  const scorePad = (maxScore - minScore) * 0.2 || 100;
 
2025
  { label: 'Running Best', data: bestLineData, borderColor: HF_ORANGE, backgroundColor: HF_ORANGE_DIM, borderWidth: 2.5, stepped: 'after', fill: true, pointRadius: 0, pointHoverRadius: 0, tension: 0, order: 2 },
2026
  { label: 'Records', data: bestScatter, type: 'scatter', backgroundColor: HF_ORANGE, borderColor: '#fff', borderWidth: 2, pointRadius: 7, pointHoverRadius: 9, pointStyle: 'circle', order: 1 },
2027
  { label: 'Non-Records', data: nonBestData, type: 'scatter', backgroundColor: NON_BEST_COLOR, borderColor: '#fff', borderWidth: 1.5, pointRadius: 5, pointHoverRadius: 7, pointStyle: 'circle', order: 0 },
2028
+ // Negatives: smaller, slightly transparent grey dots, no permanent
2029
+ // label. Tooltip carries the identifying info on hover.
2030
+ { label: 'Negatives', data: negativeData, type: 'scatter', backgroundColor: 'rgba(156,163,175,0.55)', borderColor: '#fff', borderWidth: 1, pointRadius: 4, pointHoverRadius: 6, pointStyle: 'circle', order: -1 },
2031
  ...baselineDatasets,
2032
  ],
2033
  },
 
2044
  bodyFont: { family: "'JetBrains Mono', monospace", size: 11 },
2045
  titleColor: '#fff', bodyColor: '#d1d5db',
2046
  // Run datasets (idx 0..2): only real points, skip line-extension synthetic point.
2047
+ // Negative scatter (idx 3): always allowed.
2048
+ // Baseline datasets (idx >= 4): always allowed (hover line β†’ identify it).
2049
  filter: it => {
2050
  if (it.datasetIndex >= 3) return true;
2051
  return it.raw && !it.raw._ext && it.raw.agent;
 
2053
  callbacks: {
2054
  title: items => {
2055
  const it = items[0];
2056
+ if (it.datasetIndex >= 4) return `baseline Β· ${it.dataset.label}`;
2057
+ if (it.datasetIndex === 3) return `negative Β· ${it.raw?.agent || ''}`;
2058
  return it.raw?.agent || '';
2059
  },
2060
  label: it => {
2061
+ if (it.datasetIndex >= 4) {
2062
+ return [`Bytes: ${it.raw.y.toLocaleString()}`];
2063
+ }
2064
+ if (it.datasetIndex === 3) {
2065
+ const d = it.raw._origDate ? new Date(it.raw._origDate) : new Date(it.raw.x);
2066
+ return [`Bytes: ${it.raw.y.toLocaleString()}`, `Date: ${d.toLocaleString()}`];
2067
+ }
2068
  const d = new Date(it.raw.x);
2069
  return [`Bytes: ${it.raw.y.toLocaleString()}`, `Date: ${d.toLocaleString()}`];
2070
  }
 
2144
  if (refreshing) return { skipped: true };
2145
  refreshing = true;
2146
  try {
2147
+ // Run all three in parallel
2148
+ const [freshMsgs, freshLb, freshResults] = await Promise.allSettled([
2149
  fetchAllMessages(),
2150
  fetchLeaderboard(),
2151
+ fetchResults(),
2152
  ]);
2153
 
2154
  let added = 0;
 
2168
  }
2169
  }
2170
  }
2171
+ // Merge legacy LEADERBOARD.md rows + new results/*.md files. If results
2172
+ // failed but leaderboard succeeded (or vice-versa) fall back to whichever
2173
+ // succeeded so a single failing source doesn't blank the chart.
2174
+ let mergedLb = null;
2175
+ if (freshLb.status === 'fulfilled' && freshResults.status === 'fulfilled') {
2176
+ mergedLb = mergeEntries(freshLb.value, freshResults.value);
2177
+ } else if (freshLb.status === 'fulfilled') {
2178
+ mergedLb = freshLb.value;
2179
+ console.warn('Results refresh failed:', freshResults.reason);
2180
+ } else if (freshResults.status === 'fulfilled') {
2181
+ mergedLb = freshResults.value;
2182
  console.warn('Leaderboard refresh failed:', freshLb.reason);
2183
  }
2184
+ if (mergedLb) {
2185
+ renderLeaderboard(mergedLb);
2186
+ lbStatus.textContent = `Live Β· ${mergedLb.length} entries`;
2187
+ } else {
2188
+ console.warn('Both leaderboard and results refresh failed.');
2189
+ }
2190
 
2191
+ if (freshMsgs.status === 'fulfilled' && mergedLb) {
2192
+ writeCache(freshMsgs.value, mergedLb);
2193
  setLiveStatus(true, 'Live');
2194
  } else if (freshMsgs.status === 'fulfilled') {
2195
  writeCache(freshMsgs.value, leaderboardEntries);
 
2364
 
2365
  // Background refresh
2366
  try {
2367
+ const [freshMsgs, freshLb, freshResults] = await Promise.allSettled([
2368
  fetchAllMessages(setLoadingProgress),
2369
  fetchLeaderboard(),
2370
+ fetchResults(),
2371
  ]);
2372
  if (freshMsgs.status === 'fulfilled') {
2373
  const fresh = freshMsgs.value;
 
2393
  else showFetchError(e);
2394
  }
2395
 
2396
+ let mergedLb = null;
2397
+ if (freshLb.status === 'fulfilled' && freshResults.status === 'fulfilled') {
2398
+ mergedLb = mergeEntries(freshLb.value, freshResults.value);
2399
+ } else if (freshLb.status === 'fulfilled') {
2400
+ mergedLb = freshLb.value;
2401
+ } else if (freshResults.status === 'fulfilled') {
2402
+ mergedLb = freshResults.value;
2403
+ }
2404
+ if (mergedLb) {
2405
+ renderLeaderboard(mergedLb);
2406
+ lbStatus.textContent = `Live Β· ${mergedLb.length} entries`;
2407
  } else if (!painted) {
2408
+ lbStatus.textContent = 'Failed: ' + (freshLb.reason?.message || freshResults.reason?.message || 'unknown');
2409
  }
2410
 
2411
+ if (freshMsgs.status === 'fulfilled' && mergedLb) {
2412
+ writeCache(freshMsgs.value, mergedLb);
2413
  setLiveStatus(true, 'Live');
2414
  }
2415
  } catch (err) {