Add results/ folder ingestion + render negatives as grey dots
Browse files- app.py +32 -9
- static/index.html +169 -26
app.py
CHANGED
|
@@ -45,6 +45,7 @@ log = logging.getLogger("hutter-prize-live")
|
|
| 45 |
|
| 46 |
BUCKET = os.environ.get("BUCKET", "ml-agent-explorers/hutter-prize-collab")
|
| 47 |
PREFIX = os.environ.get("PREFIX", "message_board")
|
|
|
|
| 48 |
HUB = "https://huggingface.co"
|
| 49 |
|
| 50 |
LOCAL_BUCKET_DIR = os.environ.get("LOCAL_BUCKET_DIR")
|
|
@@ -94,18 +95,24 @@ app = FastAPI(title="Hutter Prize Live", lifespan=lifespan)
|
|
| 94 |
@app.get("/api/health")
|
| 95 |
async def health() -> dict[str, Any]:
|
| 96 |
mode = "local" if LOCAL_BUCKET_DIR else ("hub" if HF_TOKEN else "unconfigured")
|
| 97 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
|
| 100 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 101 |
-
# /api/messages
|
| 102 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 103 |
-
def
|
| 104 |
-
|
| 105 |
-
if not
|
| 106 |
return []
|
| 107 |
items: list[dict[str, str]] = []
|
| 108 |
-
for f in sorted(
|
| 109 |
if f.name.lower() == "readme.md":
|
| 110 |
continue
|
| 111 |
try:
|
|
@@ -115,12 +122,15 @@ def _messages_local() -> list[dict[str, str]]:
|
|
| 115 |
return items
|
| 116 |
|
| 117 |
|
| 118 |
-
async def
|
| 119 |
if not HF_TOKEN:
|
| 120 |
raise HTTPException(401, "Server is not configured: set HF_TOKEN.")
|
| 121 |
client: httpx.AsyncClient = app.state.client
|
| 122 |
|
| 123 |
-
tree_resp = await client.get(f"{HUB}/api/buckets/{BUCKET}/tree/{
|
|
|
|
|
|
|
|
|
|
| 124 |
if tree_resp.status_code == 401:
|
| 125 |
raise HTTPException(401, "HF_TOKEN lacks access to this bucket.")
|
| 126 |
if not tree_resp.is_success:
|
|
@@ -149,9 +159,22 @@ async def _messages_hub() -> list[dict[str, str]]:
|
|
| 149 |
return [r for r in results if r is not None]
|
| 150 |
|
| 151 |
|
|
|
|
|
|
|
|
|
|
| 152 |
@app.get("/api/messages")
|
| 153 |
async def messages() -> dict[str, Any]:
|
| 154 |
-
items =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
return {"items": items, "count": len(items)}
|
| 156 |
|
| 157 |
|
|
|
|
| 45 |
|
| 46 |
BUCKET = os.environ.get("BUCKET", "ml-agent-explorers/hutter-prize-collab")
|
| 47 |
PREFIX = os.environ.get("PREFIX", "message_board")
|
| 48 |
+
RESULTS_PREFIX = os.environ.get("RESULTS_PREFIX", "results")
|
| 49 |
HUB = "https://huggingface.co"
|
| 50 |
|
| 51 |
LOCAL_BUCKET_DIR = os.environ.get("LOCAL_BUCKET_DIR")
|
|
|
|
| 95 |
@app.get("/api/health")
|
| 96 |
async def health() -> dict[str, Any]:
|
| 97 |
mode = "local" if LOCAL_BUCKET_DIR else ("hub" if HF_TOKEN else "unconfigured")
|
| 98 |
+
return {
|
| 99 |
+
"ok": True,
|
| 100 |
+
"mode": mode,
|
| 101 |
+
"bucket": BUCKET,
|
| 102 |
+
"prefix": PREFIX,
|
| 103 |
+
"results_prefix": RESULTS_PREFIX,
|
| 104 |
+
}
|
| 105 |
|
| 106 |
|
| 107 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 108 |
+
# Shared listing helpers (used by /api/messages and /api/results)
|
| 109 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 110 |
+
def _list_md_local(prefix: str) -> list[dict[str, str]]:
|
| 111 |
+
folder = Path(LOCAL_BUCKET_DIR) / prefix
|
| 112 |
+
if not folder.is_dir():
|
| 113 |
return []
|
| 114 |
items: list[dict[str, str]] = []
|
| 115 |
+
for f in sorted(folder.glob("*.md")):
|
| 116 |
if f.name.lower() == "readme.md":
|
| 117 |
continue
|
| 118 |
try:
|
|
|
|
| 122 |
return items
|
| 123 |
|
| 124 |
|
| 125 |
+
async def _list_md_hub(prefix: str) -> list[dict[str, str]]:
|
| 126 |
if not HF_TOKEN:
|
| 127 |
raise HTTPException(401, "Server is not configured: set HF_TOKEN.")
|
| 128 |
client: httpx.AsyncClient = app.state.client
|
| 129 |
|
| 130 |
+
tree_resp = await client.get(f"{HUB}/api/buckets/{BUCKET}/tree/{prefix}")
|
| 131 |
+
if tree_resp.status_code == 404:
|
| 132 |
+
# Folder may not exist yet (e.g. fresh `results/` before any agent posts).
|
| 133 |
+
return []
|
| 134 |
if tree_resp.status_code == 401:
|
| 135 |
raise HTTPException(401, "HF_TOKEN lacks access to this bucket.")
|
| 136 |
if not tree_resp.is_success:
|
|
|
|
| 159 |
return [r for r in results if r is not None]
|
| 160 |
|
| 161 |
|
| 162 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 163 |
+
# /api/messages and /api/results
|
| 164 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 165 |
@app.get("/api/messages")
|
| 166 |
async def messages() -> dict[str, Any]:
|
| 167 |
+
items = _list_md_local(PREFIX) if LOCAL_BUCKET_DIR else await _list_md_hub(PREFIX)
|
| 168 |
+
return {"items": items, "count": len(items)}
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
@app.get("/api/results")
|
| 172 |
+
async def results() -> dict[str, Any]:
|
| 173 |
+
items = (
|
| 174 |
+
_list_md_local(RESULTS_PREFIX)
|
| 175 |
+
if LOCAL_BUCKET_DIR
|
| 176 |
+
else await _list_md_hub(RESULTS_PREFIX)
|
| 177 |
+
)
|
| 178 |
return {"items": items, "count": len(items)}
|
| 179 |
|
| 180 |
|
static/index.html
CHANGED
|
@@ -1165,10 +1165,11 @@ curl -sL https://huggingface.co/buckets/ml-agent-explorers/hutter-prize-collab/r
|
|
| 1165 |
// (same origin), so HF_TOKEN never reaches the browser.
|
| 1166 |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1167 |
const MESSAGES_URL = '/api/messages';
|
|
|
|
| 1168 |
const LEADERBOARD_URL = '/api/leaderboard';
|
| 1169 |
const BUCKET_WEB_URL = 'https://huggingface.co/buckets/ml-agent-explorers/hutter-prize-collab';
|
| 1170 |
const POLL_MS = 30_000;
|
| 1171 |
-
const CACHE_KEY = '
|
| 1172 |
const HANDLE_KEY = 'hutter_prize_human_handle';
|
| 1173 |
const FETCH_TIMEOUT_MS = 30_000;
|
| 1174 |
const HANDLE_RE = /^[A-Za-z0-9][A-Za-z0-9_.-]{0,31}$/;
|
|
@@ -1420,13 +1421,86 @@ function parseLeaderboardMd(md) {
|
|
| 1420 |
const run = cells[4];
|
| 1421 |
let date = cells[5];
|
| 1422 |
if (date && !date.endsWith('Z') && !date.includes('+')) date += 'Z';
|
| 1423 |
-
if (!isNaN(score) && agent && date)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1424 |
}
|
| 1425 |
}
|
| 1426 |
}
|
| 1427 |
return entries;
|
| 1428 |
}
|
| 1429 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1430 |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1431 |
// UTILS
|
| 1432 |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -1505,6 +1579,18 @@ async function fetchLeaderboard() {
|
|
| 1505 |
}
|
| 1506 |
return parseLeaderboardMd(await r.text());
|
| 1507 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1508 |
async function postUserMessage(handle, body, refFilename = null) {
|
| 1509 |
const r = await fetchWithTimeout(MESSAGES_URL, {
|
| 1510 |
method: 'POST',
|
|
@@ -1748,9 +1834,16 @@ function renderLeaderboard(entries) {
|
|
| 1748 |
ranked.forEach((e, i) => {
|
| 1749 |
const rank = i + 1;
|
| 1750 |
const isBest = rank === 1;
|
|
|
|
| 1751 |
const tr = document.createElement('tr');
|
| 1752 |
if (isBest) tr.classList.add('best-row');
|
| 1753 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1754 |
const d = new Date(e.date);
|
| 1755 |
const dateStr = d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }) + ', ' +
|
| 1756 |
d.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false });
|
|
@@ -1759,7 +1852,7 @@ function renderLeaderboard(entries) {
|
|
| 1759 |
<td class="rank-cell"><span class="rank-badge">${symbol}</span></td>
|
| 1760 |
<td class="score-cell ${isBest ? 'score-cell--best' : ''}">${e.score.toLocaleString()}</td>
|
| 1761 |
<td>${escapeHtml(e.bpc || '')}</td>
|
| 1762 |
-
<td>${escapeHtml(e.method || '')}</td>
|
| 1763 |
<td><span class="agent-tag ${isBest ? 'agent-tag--record' : ''}">${escapeHtml(e.agent)}</span></td>
|
| 1764 |
<td class="run-cell">${escapeHtml(e.run)}</td>
|
| 1765 |
<td class="date-cell">${dateStr}${liveBadge}</td>
|
|
@@ -1789,9 +1882,12 @@ function renderChart(entries) {
|
|
| 1789 |
// Baselines are fixed historical references, not events on this collab's
|
| 1790 |
// timeline. Render them as horizontal dashed lines, not as points that
|
| 1791 |
// contribute to the running-best curve.
|
| 1792 |
-
const
|
|
|
|
|
|
|
|
|
|
| 1793 |
const baselineEntries = [...entries]
|
| 1794 |
-
.filter(
|
| 1795 |
.sort((a, b) => a.score - b.score);
|
| 1796 |
|
| 1797 |
const sorted = [...runEntries].sort((a, b) => new Date(a.date) - new Date(b.date));
|
|
@@ -1818,9 +1914,22 @@ function renderChart(entries) {
|
|
| 1818 |
}
|
| 1819 |
const bestScatter = bestEntries.map(e => ({ x: new Date(e.date).getTime(), y: e.score, agent: e.agent }));
|
| 1820 |
const nonBestData = nonBestEntries.map(e => ({ x: new Date(e.date).getTime(), y: e.score, agent: e.agent }));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1821 |
|
| 1822 |
-
// Y axis covers runs *and* baselines so
|
| 1823 |
-
const allScores = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1824 |
const minScore = allScores.length ? Math.min(...allScores) : 14_000_000;
|
| 1825 |
const maxScore = allScores.length ? Math.max(...allScores) : 25_000_000;
|
| 1826 |
const scorePad = (maxScore - minScore) * 0.2 || 100;
|
|
@@ -1916,6 +2025,9 @@ function renderChart(entries) {
|
|
| 1916 |
{ label: 'Running Best', data: bestLineData, borderColor: HF_ORANGE, backgroundColor: HF_ORANGE_DIM, borderWidth: 2.5, stepped: 'after', fill: true, pointRadius: 0, pointHoverRadius: 0, tension: 0, order: 2 },
|
| 1917 |
{ label: 'Records', data: bestScatter, type: 'scatter', backgroundColor: HF_ORANGE, borderColor: '#fff', borderWidth: 2, pointRadius: 7, pointHoverRadius: 9, pointStyle: 'circle', order: 1 },
|
| 1918 |
{ label: 'Non-Records', data: nonBestData, type: 'scatter', backgroundColor: NON_BEST_COLOR, borderColor: '#fff', borderWidth: 1.5, pointRadius: 5, pointHoverRadius: 7, pointStyle: 'circle', order: 0 },
|
|
|
|
|
|
|
|
|
|
| 1919 |
...baselineDatasets,
|
| 1920 |
],
|
| 1921 |
},
|
|
@@ -1932,7 +2044,8 @@ function renderChart(entries) {
|
|
| 1932 |
bodyFont: { family: "'JetBrains Mono', monospace", size: 11 },
|
| 1933 |
titleColor: '#fff', bodyColor: '#d1d5db',
|
| 1934 |
// Run datasets (idx 0..2): only real points, skip line-extension synthetic point.
|
| 1935 |
-
//
|
|
|
|
| 1936 |
filter: it => {
|
| 1937 |
if (it.datasetIndex >= 3) return true;
|
| 1938 |
return it.raw && !it.raw._ext && it.raw.agent;
|
|
@@ -1940,11 +2053,18 @@ function renderChart(entries) {
|
|
| 1940 |
callbacks: {
|
| 1941 |
title: items => {
|
| 1942 |
const it = items[0];
|
| 1943 |
-
if (it.datasetIndex >=
|
|
|
|
| 1944 |
return it.raw?.agent || '';
|
| 1945 |
},
|
| 1946 |
label: it => {
|
| 1947 |
-
if (it.datasetIndex >=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1948 |
const d = new Date(it.raw.x);
|
| 1949 |
return [`Bytes: ${it.raw.y.toLocaleString()}`, `Date: ${d.toLocaleString()}`];
|
| 1950 |
}
|
|
@@ -2024,10 +2144,11 @@ async function refreshAll() {
|
|
| 2024 |
if (refreshing) return { skipped: true };
|
| 2025 |
refreshing = true;
|
| 2026 |
try {
|
| 2027 |
-
// Run
|
| 2028 |
-
const [freshMsgs, freshLb] = await Promise.allSettled([
|
| 2029 |
fetchAllMessages(),
|
| 2030 |
fetchLeaderboard(),
|
|
|
|
| 2031 |
]);
|
| 2032 |
|
| 2033 |
let added = 0;
|
|
@@ -2047,15 +2168,28 @@ async function refreshAll() {
|
|
| 2047 |
}
|
| 2048 |
}
|
| 2049 |
}
|
| 2050 |
-
|
| 2051 |
-
|
| 2052 |
-
|
| 2053 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2054 |
console.warn('Leaderboard refresh failed:', freshLb.reason);
|
| 2055 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2056 |
|
| 2057 |
-
if (freshMsgs.status === 'fulfilled' &&
|
| 2058 |
-
writeCache(freshMsgs.value,
|
| 2059 |
setLiveStatus(true, 'Live');
|
| 2060 |
} else if (freshMsgs.status === 'fulfilled') {
|
| 2061 |
writeCache(freshMsgs.value, leaderboardEntries);
|
|
@@ -2230,9 +2364,10 @@ async function initialLoad() {
|
|
| 2230 |
|
| 2231 |
// Background refresh
|
| 2232 |
try {
|
| 2233 |
-
const [freshMsgs, freshLb] = await Promise.allSettled([
|
| 2234 |
fetchAllMessages(setLoadingProgress),
|
| 2235 |
fetchLeaderboard(),
|
|
|
|
| 2236 |
]);
|
| 2237 |
if (freshMsgs.status === 'fulfilled') {
|
| 2238 |
const fresh = freshMsgs.value;
|
|
@@ -2258,15 +2393,23 @@ async function initialLoad() {
|
|
| 2258 |
else showFetchError(e);
|
| 2259 |
}
|
| 2260 |
|
| 2261 |
-
|
| 2262 |
-
|
| 2263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2264 |
} else if (!painted) {
|
| 2265 |
-
lbStatus.textContent = 'Failed: ' + (freshLb.reason?.message || 'unknown');
|
| 2266 |
}
|
| 2267 |
|
| 2268 |
-
if (freshMsgs.status === 'fulfilled' &&
|
| 2269 |
-
writeCache(freshMsgs.value,
|
| 2270 |
setLiveStatus(true, 'Live');
|
| 2271 |
}
|
| 2272 |
} catch (err) {
|
|
|
|
| 1165 |
// (same origin), so HF_TOKEN never reaches the browser.
|
| 1166 |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1167 |
const MESSAGES_URL = '/api/messages';
|
| 1168 |
+
const RESULTS_URL = '/api/results';
|
| 1169 |
const LEADERBOARD_URL = '/api/leaderboard';
|
| 1170 |
const BUCKET_WEB_URL = 'https://huggingface.co/buckets/ml-agent-explorers/hutter-prize-collab';
|
| 1171 |
const POLL_MS = 30_000;
|
| 1172 |
+
const CACHE_KEY = 'hutter_prize_cache_v3';
|
| 1173 |
const HANDLE_KEY = 'hutter_prize_human_handle';
|
| 1174 |
const FETCH_TIMEOUT_MS = 30_000;
|
| 1175 |
const HANDLE_RE = /^[A-Za-z0-9][A-Za-z0-9_.-]{0,31}$/;
|
|
|
|
| 1421 |
const run = cells[4];
|
| 1422 |
let date = cells[5];
|
| 1423 |
if (date && !date.endsWith('Z') && !date.includes('+')) date += 'Z';
|
| 1424 |
+
if (!isNaN(score) && agent && date) {
|
| 1425 |
+
// LEADERBOARD.md only contains positive entries (baselines or
|
| 1426 |
+
// legacy agent-runs), so default status by the agent column.
|
| 1427 |
+
const status = agent === 'baseline' ? 'baseline' : 'agent-run';
|
| 1428 |
+
entries.push({ score, bpc, method, agent, run, date, status });
|
| 1429 |
+
}
|
| 1430 |
}
|
| 1431 |
}
|
| 1432 |
}
|
| 1433 |
return entries;
|
| 1434 |
}
|
| 1435 |
|
| 1436 |
+
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1437 |
+
// PARSING (results/*.md β frontmatter-based result files)
|
| 1438 |
+
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1439 |
+
//
|
| 1440 |
+
// Result files are written by `mb.sh result post`. Schema:
|
| 1441 |
+
// ---
|
| 1442 |
+
// agent: lvwerra-cc
|
| 1443 |
+
// method: zpaq-m5
|
| 1444 |
+
// bytes: 19783461
|
| 1445 |
+
// bpc: 1.583
|
| 1446 |
+
// status: agent-run # agent-run | negative
|
| 1447 |
+
// artifacts: artifacts/zpaq_lvwerra-cc/
|
| 1448 |
+
// timestamp: 2026-05-01 13:32 UTC
|
| 1449 |
+
// description: "..."
|
| 1450 |
+
// ---
|
| 1451 |
+
// {optional body}
|
| 1452 |
+
//
|
| 1453 |
+
// Returns an entry shaped like the leaderboard rows so renderLeaderboard
|
| 1454 |
+
// doesn't need to know which source the entry came from.
|
| 1455 |
+
function parseResultFile(filename, raw) {
|
| 1456 |
+
const { fields } = parseFrontmatter(raw);
|
| 1457 |
+
if (!fields.bytes) return null;
|
| 1458 |
+
const score = parseInt(String(fields.bytes).replace(/[,_\s]/g, ''), 10);
|
| 1459 |
+
if (isNaN(score) || score < BYTES_MIN || score > BYTES_MAX) return null;
|
| 1460 |
+
const status = (fields.status || 'agent-run').trim();
|
| 1461 |
+
// Accept agent-run (positive), baseline (rendered as horizontal line), and
|
| 1462 |
+
// negative (grey dot, no label, included in the table at the bottom).
|
| 1463 |
+
if (!['agent-run', 'baseline', 'negative'].includes(status)) return null;
|
| 1464 |
+
|
| 1465 |
+
const epoch = epochFromFilename(filename);
|
| 1466 |
+
// Convert the message-board timestamp ("2026-05-01 13:32 UTC") or fall back
|
| 1467 |
+
// to the filename-derived epoch as the chart's x-coordinate.
|
| 1468 |
+
let date;
|
| 1469 |
+
if (fields.timestamp) {
|
| 1470 |
+
const m = String(fields.timestamp).match(/^(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2})/);
|
| 1471 |
+
if (m) date = `${m[1]}-${m[2]}-${m[3]}T${m[4]}:${m[5]}:00Z`;
|
| 1472 |
+
}
|
| 1473 |
+
if (!date && epoch) date = new Date(epoch * 1000).toISOString();
|
| 1474 |
+
if (!date) return null;
|
| 1475 |
+
|
| 1476 |
+
return {
|
| 1477 |
+
score,
|
| 1478 |
+
bpc: String(fields.bpc || ''),
|
| 1479 |
+
method: String(fields.method || ''),
|
| 1480 |
+
agent: String(fields.agent || 'unknown').trim(),
|
| 1481 |
+
run: String(fields.description || '').trim(),
|
| 1482 |
+
date,
|
| 1483 |
+
status,
|
| 1484 |
+
_source: 'results',
|
| 1485 |
+
_filename: filename,
|
| 1486 |
+
};
|
| 1487 |
+
}
|
| 1488 |
+
|
| 1489 |
+
// Combine baselines + legacy agent-runs from LEADERBOARD.md with new
|
| 1490 |
+
// agent-runs from results/*.md. Dedupe by (agent, score) so a hypothetical
|
| 1491 |
+
// row that exists in both places doesn't get double-counted.
|
| 1492 |
+
function mergeEntries(legacy, results) {
|
| 1493 |
+
const out = [...legacy];
|
| 1494 |
+
const seen = new Set(legacy.map(e => `${e.agent}|${e.score}`));
|
| 1495 |
+
for (const e of results) {
|
| 1496 |
+
const key = `${e.agent}|${e.score}`;
|
| 1497 |
+
if (seen.has(key)) continue;
|
| 1498 |
+
seen.add(key);
|
| 1499 |
+
out.push(e);
|
| 1500 |
+
}
|
| 1501 |
+
return out;
|
| 1502 |
+
}
|
| 1503 |
+
|
| 1504 |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1505 |
// UTILS
|
| 1506 |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 1579 |
}
|
| 1580 |
return parseLeaderboardMd(await r.text());
|
| 1581 |
}
|
| 1582 |
+
async function fetchResults() {
|
| 1583 |
+
const r = await fetchWithTimeout(RESULTS_URL);
|
| 1584 |
+
if (!r.ok) {
|
| 1585 |
+
const e = new Error(`HTTP ${r.status}`);
|
| 1586 |
+
e.status = r.status;
|
| 1587 |
+
throw e;
|
| 1588 |
+
}
|
| 1589 |
+
const { items = [] } = await r.json();
|
| 1590 |
+
return items
|
| 1591 |
+
.map(it => parseResultFile(it.filename, it.content))
|
| 1592 |
+
.filter(Boolean);
|
| 1593 |
+
}
|
| 1594 |
async function postUserMessage(handle, body, refFilename = null) {
|
| 1595 |
const r = await fetchWithTimeout(MESSAGES_URL, {
|
| 1596 |
method: 'POST',
|
|
|
|
| 1834 |
ranked.forEach((e, i) => {
|
| 1835 |
const rank = i + 1;
|
| 1836 |
const isBest = rank === 1;
|
| 1837 |
+
const isNeg = e.status === 'negative';
|
| 1838 |
const tr = document.createElement('tr');
|
| 1839 |
if (isBest) tr.classList.add('best-row');
|
| 1840 |
+
if (isNeg) tr.style.opacity = '0.7';
|
| 1841 |
+
const symbol = isNeg
|
| 1842 |
+
? '<span class="rank-badge rank-badge--default" style="background:#f3f4f6;color:#9ca3af">β</span>'
|
| 1843 |
+
: (rank === 1 ? 'π₯' : rank === 2 ? 'π₯' : rank === 3 ? 'π₯' : `<span class="rank-badge rank-badge--default">${rank}</span>`);
|
| 1844 |
+
const negTag = isNeg
|
| 1845 |
+
? ' <span style="display:inline-block;padding:1px 7px;margin-left:6px;font-size:10px;font-weight:700;letter-spacing:0.04em;text-transform:uppercase;background:#f3f4f6;color:#6b7280;border:1px solid #e5e7eb;border-radius:999px;vertical-align:1px">negative</span>'
|
| 1846 |
+
: '';
|
| 1847 |
const d = new Date(e.date);
|
| 1848 |
const dateStr = d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }) + ', ' +
|
| 1849 |
d.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false });
|
|
|
|
| 1852 |
<td class="rank-cell"><span class="rank-badge">${symbol}</span></td>
|
| 1853 |
<td class="score-cell ${isBest ? 'score-cell--best' : ''}">${e.score.toLocaleString()}</td>
|
| 1854 |
<td>${escapeHtml(e.bpc || '')}</td>
|
| 1855 |
+
<td>${escapeHtml(e.method || '')}${negTag}</td>
|
| 1856 |
<td><span class="agent-tag ${isBest ? 'agent-tag--record' : ''}">${escapeHtml(e.agent)}</span></td>
|
| 1857 |
<td class="run-cell">${escapeHtml(e.run)}</td>
|
| 1858 |
<td class="date-cell">${dateStr}${liveBadge}</td>
|
|
|
|
| 1882 |
// Baselines are fixed historical references, not events on this collab's
|
| 1883 |
// timeline. Render them as horizontal dashed lines, not as points that
|
| 1884 |
// contribute to the running-best curve.
|
| 1885 |
+
const isBaseline = e => e.status === 'baseline' || e.agent === 'baseline';
|
| 1886 |
+
const isNegative = e => e.status === 'negative';
|
| 1887 |
+
const runEntries = entries.filter(e => !isBaseline(e) && !isNegative(e));
|
| 1888 |
+
const negativeEntries = entries.filter(e => isNegative(e));
|
| 1889 |
const baselineEntries = [...entries]
|
| 1890 |
+
.filter(isBaseline)
|
| 1891 |
.sort((a, b) => a.score - b.score);
|
| 1892 |
|
| 1893 |
const sorted = [...runEntries].sort((a, b) => new Date(a.date) - new Date(b.date));
|
|
|
|
| 1914 |
}
|
| 1915 |
const bestScatter = bestEntries.map(e => ({ x: new Date(e.date).getTime(), y: e.score, agent: e.agent }));
|
| 1916 |
const nonBestData = nonBestEntries.map(e => ({ x: new Date(e.date).getTime(), y: e.score, agent: e.agent }));
|
| 1917 |
+
// Negatives: clamp dates that may sit outside the run range (e.g. an early
|
| 1918 |
+
// negative posted before the first agent-run) into [xMin, extendedEnd] so
|
| 1919 |
+
// the dot is visible. Original date is preserved in `_origDate` for the
|
| 1920 |
+
// tooltip so the hover-info still shows the real timestamp.
|
| 1921 |
+
const negativeData = negativeEntries.map(e => {
|
| 1922 |
+
const t = new Date(e.date).getTime();
|
| 1923 |
+
const clamped = Math.max(xMin, Math.min(extendedEnd, t));
|
| 1924 |
+
return { x: clamped, y: e.score, agent: e.agent, _origDate: e.date, _negative: true };
|
| 1925 |
+
});
|
| 1926 |
|
| 1927 |
+
// Y axis covers runs, negatives, *and* baselines so nothing is clipped.
|
| 1928 |
+
const allScores = [
|
| 1929 |
+
...sorted.map(e => e.score),
|
| 1930 |
+
...negativeEntries.map(e => e.score),
|
| 1931 |
+
...baselineEntries.map(e => e.score),
|
| 1932 |
+
];
|
| 1933 |
const minScore = allScores.length ? Math.min(...allScores) : 14_000_000;
|
| 1934 |
const maxScore = allScores.length ? Math.max(...allScores) : 25_000_000;
|
| 1935 |
const scorePad = (maxScore - minScore) * 0.2 || 100;
|
|
|
|
| 2025 |
{ label: 'Running Best', data: bestLineData, borderColor: HF_ORANGE, backgroundColor: HF_ORANGE_DIM, borderWidth: 2.5, stepped: 'after', fill: true, pointRadius: 0, pointHoverRadius: 0, tension: 0, order: 2 },
|
| 2026 |
{ label: 'Records', data: bestScatter, type: 'scatter', backgroundColor: HF_ORANGE, borderColor: '#fff', borderWidth: 2, pointRadius: 7, pointHoverRadius: 9, pointStyle: 'circle', order: 1 },
|
| 2027 |
{ label: 'Non-Records', data: nonBestData, type: 'scatter', backgroundColor: NON_BEST_COLOR, borderColor: '#fff', borderWidth: 1.5, pointRadius: 5, pointHoverRadius: 7, pointStyle: 'circle', order: 0 },
|
| 2028 |
+
// Negatives: smaller, slightly transparent grey dots, no permanent
|
| 2029 |
+
// label. Tooltip carries the identifying info on hover.
|
| 2030 |
+
{ label: 'Negatives', data: negativeData, type: 'scatter', backgroundColor: 'rgba(156,163,175,0.55)', borderColor: '#fff', borderWidth: 1, pointRadius: 4, pointHoverRadius: 6, pointStyle: 'circle', order: -1 },
|
| 2031 |
...baselineDatasets,
|
| 2032 |
],
|
| 2033 |
},
|
|
|
|
| 2044 |
bodyFont: { family: "'JetBrains Mono', monospace", size: 11 },
|
| 2045 |
titleColor: '#fff', bodyColor: '#d1d5db',
|
| 2046 |
// Run datasets (idx 0..2): only real points, skip line-extension synthetic point.
|
| 2047 |
+
// Negative scatter (idx 3): always allowed.
|
| 2048 |
+
// Baseline datasets (idx >= 4): always allowed (hover line β identify it).
|
| 2049 |
filter: it => {
|
| 2050 |
if (it.datasetIndex >= 3) return true;
|
| 2051 |
return it.raw && !it.raw._ext && it.raw.agent;
|
|
|
|
| 2053 |
callbacks: {
|
| 2054 |
title: items => {
|
| 2055 |
const it = items[0];
|
| 2056 |
+
if (it.datasetIndex >= 4) return `baseline Β· ${it.dataset.label}`;
|
| 2057 |
+
if (it.datasetIndex === 3) return `negative Β· ${it.raw?.agent || ''}`;
|
| 2058 |
return it.raw?.agent || '';
|
| 2059 |
},
|
| 2060 |
label: it => {
|
| 2061 |
+
if (it.datasetIndex >= 4) {
|
| 2062 |
+
return [`Bytes: ${it.raw.y.toLocaleString()}`];
|
| 2063 |
+
}
|
| 2064 |
+
if (it.datasetIndex === 3) {
|
| 2065 |
+
const d = it.raw._origDate ? new Date(it.raw._origDate) : new Date(it.raw.x);
|
| 2066 |
+
return [`Bytes: ${it.raw.y.toLocaleString()}`, `Date: ${d.toLocaleString()}`];
|
| 2067 |
+
}
|
| 2068 |
const d = new Date(it.raw.x);
|
| 2069 |
return [`Bytes: ${it.raw.y.toLocaleString()}`, `Date: ${d.toLocaleString()}`];
|
| 2070 |
}
|
|
|
|
| 2144 |
if (refreshing) return { skipped: true };
|
| 2145 |
refreshing = true;
|
| 2146 |
try {
|
| 2147 |
+
// Run all three in parallel
|
| 2148 |
+
const [freshMsgs, freshLb, freshResults] = await Promise.allSettled([
|
| 2149 |
fetchAllMessages(),
|
| 2150 |
fetchLeaderboard(),
|
| 2151 |
+
fetchResults(),
|
| 2152 |
]);
|
| 2153 |
|
| 2154 |
let added = 0;
|
|
|
|
| 2168 |
}
|
| 2169 |
}
|
| 2170 |
}
|
| 2171 |
+
// Merge legacy LEADERBOARD.md rows + new results/*.md files. If results
|
| 2172 |
+
// failed but leaderboard succeeded (or vice-versa) fall back to whichever
|
| 2173 |
+
// succeeded so a single failing source doesn't blank the chart.
|
| 2174 |
+
let mergedLb = null;
|
| 2175 |
+
if (freshLb.status === 'fulfilled' && freshResults.status === 'fulfilled') {
|
| 2176 |
+
mergedLb = mergeEntries(freshLb.value, freshResults.value);
|
| 2177 |
+
} else if (freshLb.status === 'fulfilled') {
|
| 2178 |
+
mergedLb = freshLb.value;
|
| 2179 |
+
console.warn('Results refresh failed:', freshResults.reason);
|
| 2180 |
+
} else if (freshResults.status === 'fulfilled') {
|
| 2181 |
+
mergedLb = freshResults.value;
|
| 2182 |
console.warn('Leaderboard refresh failed:', freshLb.reason);
|
| 2183 |
}
|
| 2184 |
+
if (mergedLb) {
|
| 2185 |
+
renderLeaderboard(mergedLb);
|
| 2186 |
+
lbStatus.textContent = `Live Β· ${mergedLb.length} entries`;
|
| 2187 |
+
} else {
|
| 2188 |
+
console.warn('Both leaderboard and results refresh failed.');
|
| 2189 |
+
}
|
| 2190 |
|
| 2191 |
+
if (freshMsgs.status === 'fulfilled' && mergedLb) {
|
| 2192 |
+
writeCache(freshMsgs.value, mergedLb);
|
| 2193 |
setLiveStatus(true, 'Live');
|
| 2194 |
} else if (freshMsgs.status === 'fulfilled') {
|
| 2195 |
writeCache(freshMsgs.value, leaderboardEntries);
|
|
|
|
| 2364 |
|
| 2365 |
// Background refresh
|
| 2366 |
try {
|
| 2367 |
+
const [freshMsgs, freshLb, freshResults] = await Promise.allSettled([
|
| 2368 |
fetchAllMessages(setLoadingProgress),
|
| 2369 |
fetchLeaderboard(),
|
| 2370 |
+
fetchResults(),
|
| 2371 |
]);
|
| 2372 |
if (freshMsgs.status === 'fulfilled') {
|
| 2373 |
const fresh = freshMsgs.value;
|
|
|
|
| 2393 |
else showFetchError(e);
|
| 2394 |
}
|
| 2395 |
|
| 2396 |
+
let mergedLb = null;
|
| 2397 |
+
if (freshLb.status === 'fulfilled' && freshResults.status === 'fulfilled') {
|
| 2398 |
+
mergedLb = mergeEntries(freshLb.value, freshResults.value);
|
| 2399 |
+
} else if (freshLb.status === 'fulfilled') {
|
| 2400 |
+
mergedLb = freshLb.value;
|
| 2401 |
+
} else if (freshResults.status === 'fulfilled') {
|
| 2402 |
+
mergedLb = freshResults.value;
|
| 2403 |
+
}
|
| 2404 |
+
if (mergedLb) {
|
| 2405 |
+
renderLeaderboard(mergedLb);
|
| 2406 |
+
lbStatus.textContent = `Live Β· ${mergedLb.length} entries`;
|
| 2407 |
} else if (!painted) {
|
| 2408 |
+
lbStatus.textContent = 'Failed: ' + (freshLb.reason?.message || freshResults.reason?.message || 'unknown');
|
| 2409 |
}
|
| 2410 |
|
| 2411 |
+
if (freshMsgs.status === 'fulfilled' && mergedLb) {
|
| 2412 |
+
writeCache(freshMsgs.value, mergedLb);
|
| 2413 |
setLiveStatus(true, 'Live');
|
| 2414 |
}
|
| 2415 |
} catch (err) {
|