Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
fix details
Browse filesfix issue with details of models, and benchmark, modify the included details for each sample
- backend/data_loader.py +20 -4
- frontend/leaderboard.html +38 -19
backend/data_loader.py
CHANGED
|
@@ -7,6 +7,7 @@ import io
|
|
| 7 |
import logging
|
| 8 |
import re
|
| 9 |
import ast
|
|
|
|
| 10 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 11 |
from pathlib import Path
|
| 12 |
from typing import Dict, List, Any, Optional
|
|
@@ -573,11 +574,11 @@ def load_benchmark_details(
|
|
| 573 |
|
| 574 |
selected_entries.sort(key=lambda x: x[0].lower())
|
| 575 |
|
| 576 |
-
|
| 577 |
subtasks_summary: List[Dict[str, Any]] = []
|
| 578 |
for subtask, info in selected_entries:
|
| 579 |
rows = _read_detail_parquet(info["path"], subtask)
|
| 580 |
-
|
| 581 |
|
| 582 |
valid = [r for r in rows if isinstance(r.get("is_correct"), bool)]
|
| 583 |
correct = sum(1 for r in valid if r["is_correct"])
|
|
@@ -591,8 +592,23 @@ def load_benchmark_details(
|
|
| 591 |
"accuracy": accuracy,
|
| 592 |
})
|
| 593 |
|
| 594 |
-
|
| 595 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 596 |
|
| 597 |
return {
|
| 598 |
"benchmark": benchmark_display,
|
|
|
|
| 7 |
import logging
|
| 8 |
import re
|
| 9 |
import ast
|
| 10 |
+
from collections import deque
|
| 11 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 12 |
from pathlib import Path
|
| 13 |
from typing import Dict, List, Any, Optional
|
|
|
|
| 574 |
|
| 575 |
selected_entries.sort(key=lambda x: x[0].lower())
|
| 576 |
|
| 577 |
+
rows_by_subtask: List[List[Dict[str, Any]]] = []
|
| 578 |
subtasks_summary: List[Dict[str, Any]] = []
|
| 579 |
for subtask, info in selected_entries:
|
| 580 |
rows = _read_detail_parquet(info["path"], subtask)
|
| 581 |
+
rows_by_subtask.append(rows)
|
| 582 |
|
| 583 |
valid = [r for r in rows if isinstance(r.get("is_correct"), bool)]
|
| 584 |
correct = sum(1 for r in valid if r["is_correct"])
|
|
|
|
| 592 |
"accuracy": accuracy,
|
| 593 |
})
|
| 594 |
|
| 595 |
+
total_rows = sum(len(rows) for rows in rows_by_subtask)
|
| 596 |
+
if max_rows > 0 and total_rows > max_rows:
|
| 597 |
+
queues = [deque(rows) for rows in rows_by_subtask]
|
| 598 |
+
all_rows: List[Dict[str, Any]] = []
|
| 599 |
+
while len(all_rows) < max_rows:
|
| 600 |
+
progressed = False
|
| 601 |
+
for q in queues:
|
| 602 |
+
if not q:
|
| 603 |
+
continue
|
| 604 |
+
all_rows.append(q.popleft())
|
| 605 |
+
progressed = True
|
| 606 |
+
if len(all_rows) >= max_rows:
|
| 607 |
+
break
|
| 608 |
+
if not progressed:
|
| 609 |
+
break
|
| 610 |
+
else:
|
| 611 |
+
all_rows = [row for rows in rows_by_subtask for row in rows]
|
| 612 |
|
| 613 |
return {
|
| 614 |
"benchmark": benchmark_display,
|
frontend/leaderboard.html
CHANGED
|
@@ -777,7 +777,6 @@
|
|
| 777 |
const output = escapeHtml(asUnknown(r.output));
|
| 778 |
const sampleMeta = [
|
| 779 |
r.question_id ? `id: ${escapeHtml(r.question_id)}` : null,
|
| 780 |
-
r.metric_name ? `metric: ${escapeHtml(r.metric_name)}` : null,
|
| 781 |
r.metric !== null && r.metric !== undefined ? `score: ${escapeHtml(r.metric)}` : null,
|
| 782 |
].filter(Boolean).join(" | ");
|
| 783 |
|
|
@@ -808,7 +807,8 @@
|
|
| 808 |
|
| 809 |
// --- MODAL LOGIC ---
|
| 810 |
window.openModelDetails = function (modelName) {
|
| 811 |
-
|
|
|
|
| 812 |
if (!model) return;
|
| 813 |
|
| 814 |
const fullPath = model["Model Name"];
|
|
@@ -817,7 +817,8 @@
|
|
| 817 |
const displayModel = hasOrg ? fullPath.substring(splitIndex + 1) : fullPath;
|
| 818 |
const displayOrg = hasOrg ? fullPath.substring(0, splitIndex) : null;
|
| 819 |
|
| 820 |
-
$('#modalTitle')
|
|
|
|
| 821 |
<div class="flex flex-col">
|
| 822 |
<span>${displayModel}</span>
|
| 823 |
${displayOrg ? `
|
|
@@ -833,14 +834,21 @@
|
|
| 833 |
const revisionForApi = revision === "Unknown" ? "main" : revision;
|
| 834 |
const modelSize = toNumber(model["Model Size"]);
|
| 835 |
const fallbackLikes = model["Hub ❤️"];
|
| 836 |
-
$('#modalRank')
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
// We don't have an ID for downloads yet in the static HTML, so we rely on the injected HTML below
|
| 841 |
-
$('#modalLicense')
|
| 842 |
-
|
| 843 |
-
$('#
|
|
|
|
|
|
|
|
|
|
| 844 |
|
| 845 |
// --- 1. MODIFIED: Added Download Span to Metadata Line ---
|
| 846 |
// I added the separator dot and the Downloads span at the end of this block
|
|
@@ -875,7 +883,8 @@
|
|
| 875 |
.then(response => response.json())
|
| 876 |
.then(data => {
|
| 877 |
if (data.likes !== undefined && data.likes !== null) {
|
| 878 |
-
$('#modalLikes')
|
|
|
|
| 879 |
}
|
| 880 |
// Check if API returns downloads and update
|
| 881 |
if (data.downloads !== undefined) {
|
|
@@ -887,15 +896,19 @@
|
|
| 887 |
console.error('Error fetching stats:', error);
|
| 888 |
const dl = document.getElementById('modalDownloads');
|
| 889 |
if (dl) dl.innerText = "Unknown";
|
| 890 |
-
$('#modalLikes')
|
|
|
|
| 891 |
});
|
| 892 |
|
| 893 |
-
|
|
|
|
|
|
|
| 894 |
|
| 895 |
-
$('#modalLinkDetails')
|
|
|
|
| 896 |
|
| 897 |
const chartContainer = $('#modalChart');
|
| 898 |
-
chartContainer.innerHTML = "";
|
| 899 |
|
| 900 |
EVAL_COLUMNS.forEach(col => {
|
| 901 |
const score = parseFloat(model[col]) || 0;
|
|
@@ -910,7 +923,7 @@
|
|
| 910 |
<div class="w-12 text-sm font-bold text-slate-700 dark:text-slate-200 text-right shrink-0">${score.toFixed(2)}</div>
|
| 911 |
</div>
|
| 912 |
`;
|
| 913 |
-
chartContainer.insertAdjacentHTML('beforeend', barHtml);
|
| 914 |
|
| 915 |
setTimeout(() => {
|
| 916 |
const bar = document.getElementById(`bar-${col.replace(/\s+/g, '')}`);
|
|
@@ -918,12 +931,17 @@
|
|
| 918 |
}, 100);
|
| 919 |
});
|
| 920 |
|
| 921 |
-
$('#modelModal')
|
|
|
|
| 922 |
document.body.style.overflow = 'hidden';
|
| 923 |
if (window.lucide) lucide.createIcons();
|
|
|
|
|
|
|
|
|
|
| 924 |
};
|
| 925 |
window.closeModelDetails = function () {
|
| 926 |
-
$('#modelModal')
|
|
|
|
| 927 |
document.body.style.overflow = '';
|
| 928 |
};
|
| 929 |
|
|
@@ -1073,11 +1091,12 @@
|
|
| 1073 |
const type = (typeIdx > -1 && r.cells[typeIdx]) ? r.cells[typeIdx].data : "";
|
| 1074 |
const tMap = { "base": "🟢", "instruct": "🔶"};
|
| 1075 |
const tIcon = tMap[type] || type;
|
|
|
|
| 1076 |
|
| 1077 |
return gridjs.html(`
|
| 1078 |
<div class="relative w-full h-[38px] flex items-center group pr-6">
|
| 1079 |
${type ? `<span class="mr-2 text-lg select-none" title="${type}">${tIcon}</span>` : ''}
|
| 1080 |
-
<div onclick="window.openModelDetails('${
|
| 1081 |
</div>
|
| 1082 |
`);
|
| 1083 |
};
|
|
|
|
| 777 |
const output = escapeHtml(asUnknown(r.output));
|
| 778 |
const sampleMeta = [
|
| 779 |
r.question_id ? `id: ${escapeHtml(r.question_id)}` : null,
|
|
|
|
| 780 |
r.metric !== null && r.metric !== undefined ? `score: ${escapeHtml(r.metric)}` : null,
|
| 781 |
].filter(Boolean).join(" | ");
|
| 782 |
|
|
|
|
| 807 |
|
| 808 |
// --- MODAL LOGIC ---
|
| 809 |
window.openModelDetails = function (modelName) {
|
| 810 |
+
try {
|
| 811 |
+
const model = lbData.find(r => String(r["Model Name"] || "") === String(modelName || ""));
|
| 812 |
if (!model) return;
|
| 813 |
|
| 814 |
const fullPath = model["Model Name"];
|
|
|
|
| 817 |
const displayModel = hasOrg ? fullPath.substring(splitIndex + 1) : fullPath;
|
| 818 |
const displayOrg = hasOrg ? fullPath.substring(0, splitIndex) : null;
|
| 819 |
|
| 820 |
+
const modalTitleEl = $('#modalTitle');
|
| 821 |
+
if (modalTitleEl) modalTitleEl.innerHTML = `
|
| 822 |
<div class="flex flex-col">
|
| 823 |
<span>${displayModel}</span>
|
| 824 |
${displayOrg ? `
|
|
|
|
| 834 |
const revisionForApi = revision === "Unknown" ? "main" : revision;
|
| 835 |
const modelSize = toNumber(model["Model Size"]);
|
| 836 |
const fallbackLikes = model["Hub ❤️"];
|
| 837 |
+
const modalRankEl = $('#modalRank');
|
| 838 |
+
if (modalRankEl) modalRankEl.innerText = "#" + model["Rank"];
|
| 839 |
+
const modalAvgEl = $('#modalAvg');
|
| 840 |
+
if (modalAvgEl) modalAvgEl.innerText = parseFloat(model["Average"]).toFixed(2);
|
| 841 |
+
const modalSizeEl = $('#modalSize');
|
| 842 |
+
if (modalSizeEl) modalSizeEl.innerText = modelSize === null ? "Unknown" : `${Math.floor(modelSize)}B`;
|
| 843 |
+
const modalLikesEl = $('#modalLikes');
|
| 844 |
+
if (modalLikesEl) modalLikesEl.innerText = prettyIntOrUnknown(fallbackLikes);
|
| 845 |
// We don't have an ID for downloads yet in the static HTML, so we rely on the injected HTML below
|
| 846 |
+
const modalLicenseEl = $('#modalLicense');
|
| 847 |
+
if (modalLicenseEl) modalLicenseEl.innerText = asUnknown(model["License"]);
|
| 848 |
+
const modalPrecisionEl = $('#modalPrecision');
|
| 849 |
+
if (modalPrecisionEl) modalPrecisionEl.innerText = asUnknown(model["Precision"]);
|
| 850 |
+
const modalRevisionEl = $('#modalRevision');
|
| 851 |
+
if (modalRevisionEl) modalRevisionEl.innerText = revision;
|
| 852 |
|
| 853 |
// --- 1. MODIFIED: Added Download Span to Metadata Line ---
|
| 854 |
// I added the separator dot and the Downloads span at the end of this block
|
|
|
|
| 883 |
.then(response => response.json())
|
| 884 |
.then(data => {
|
| 885 |
if (data.likes !== undefined && data.likes !== null) {
|
| 886 |
+
const likesEl = $('#modalLikes');
|
| 887 |
+
if (likesEl) likesEl.innerText = prettyIntOrUnknown(data.likes);
|
| 888 |
}
|
| 889 |
// Check if API returns downloads and update
|
| 890 |
if (data.downloads !== undefined) {
|
|
|
|
| 896 |
console.error('Error fetching stats:', error);
|
| 897 |
const dl = document.getElementById('modalDownloads');
|
| 898 |
if (dl) dl.innerText = "Unknown";
|
| 899 |
+
const likesEl = $('#modalLikes');
|
| 900 |
+
if (likesEl) likesEl.innerText = prettyIntOrUnknown(fallbackLikes);
|
| 901 |
});
|
| 902 |
|
| 903 |
+
const modelPath = encodeURIComponent(String(model["Model Name"] || "")).replace(/%2F/g, '/');
|
| 904 |
+
const hfLink = $('#modalLinkHF');
|
| 905 |
+
if (hfLink) hfLink.href = `https://huggingface.co/${modelPath}`;
|
| 906 |
|
| 907 |
+
const detailsLink = $('#modalLinkDetails');
|
| 908 |
+
if (detailsLink) detailsLink.href = `https://huggingface.co/datasets/qimma/leaderboard-details/tree/main/${modelPath}`;
|
| 909 |
|
| 910 |
const chartContainer = $('#modalChart');
|
| 911 |
+
if (chartContainer) chartContainer.innerHTML = "";
|
| 912 |
|
| 913 |
EVAL_COLUMNS.forEach(col => {
|
| 914 |
const score = parseFloat(model[col]) || 0;
|
|
|
|
| 923 |
<div class="w-12 text-sm font-bold text-slate-700 dark:text-slate-200 text-right shrink-0">${score.toFixed(2)}</div>
|
| 924 |
</div>
|
| 925 |
`;
|
| 926 |
+
if (chartContainer) chartContainer.insertAdjacentHTML('beforeend', barHtml);
|
| 927 |
|
| 928 |
setTimeout(() => {
|
| 929 |
const bar = document.getElementById(`bar-${col.replace(/\s+/g, '')}`);
|
|
|
|
| 931 |
}, 100);
|
| 932 |
});
|
| 933 |
|
| 934 |
+
const modelModalEl = $('#modelModal');
|
| 935 |
+
if (modelModalEl) modelModalEl.classList.remove('hidden');
|
| 936 |
document.body.style.overflow = 'hidden';
|
| 937 |
if (window.lucide) lucide.createIcons();
|
| 938 |
+
} catch (err) {
|
| 939 |
+
console.error("openModelDetails failed", err);
|
| 940 |
+
}
|
| 941 |
};
|
| 942 |
window.closeModelDetails = function () {
|
| 943 |
+
const modelModalEl = $('#modelModal');
|
| 944 |
+
if (modelModalEl) modelModalEl.classList.add('hidden');
|
| 945 |
document.body.style.overflow = '';
|
| 946 |
};
|
| 947 |
|
|
|
|
| 1091 |
const type = (typeIdx > -1 && r.cells[typeIdx]) ? r.cells[typeIdx].data : "";
|
| 1092 |
const tMap = { "base": "🟢", "instruct": "🔶"};
|
| 1093 |
const tIcon = tMap[type] || type;
|
| 1094 |
+
const em = encodeURIComponent(String(c ?? ""));
|
| 1095 |
|
| 1096 |
return gridjs.html(`
|
| 1097 |
<div class="relative w-full h-[38px] flex items-center group pr-6">
|
| 1098 |
${type ? `<span class="mr-2 text-lg select-none" title="${type}">${tIcon}</span>` : ''}
|
| 1099 |
+
<div onclick="window.openModelDetails(decodeURIComponent('${em}'))" class="font-bold text-indigo-600 dark:text-indigo-400 hover:underline cursor-pointer line-clamp-2-custom leading-[1.3] select-text" title="Click for details">${c}</div>
|
| 1100 |
</div>
|
| 1101 |
`);
|
| 1102 |
};
|