window.HELP_IMPROVE_VIDEOJS = false; let fullLeaderboardData = null; /** Average (5 domains) metric vs parameters; dataset key is always `average`. */ const AVERAGE_METRIC_PLOTS = [ { plotId: 'plot-avg-r50', metricKey: 'recall_50', yTitle: 'R@50 (Avg. 5)', hoverMetric: 'Recall@50', yMin: 0.15, yMax: 0.755 }, { plotId: 'plot-avg-alpha10', metricKey: 'alpha_ndcg_10', yTitle: 'α@10 (Avg. 5)', hoverMetric: 'α@10', yMin: 0.1, yMax: 0.541 }, { plotId: 'plot-avg-c20', metricKey: 'coverage_20', yTitle: 'C@20 (Avg. 5)', hoverMetric: 'C@20', yMin: 0.25, yMax: 0.868 } ]; const RECALL_TYPE_ORDER = ['upper_baseline', 'open_source', 'proprietary']; const RECALL_TYPE_LABELS = { upper_baseline: 'Oracle (Stack Overflow nuggets)', open_source: 'Open source', proprietary: 'Proprietary' }; const TYPE_SYMBOLS = { upper_baseline: 'star', open_source: 'circle', proprietary: 'diamond' }; const PINNED_FAMILY_COLORS = { 'Stella': '#1f77b4', 'Harrier OSS': '#ff7f0e', 'Voyage': '#009688', 'Jina': '#d62728', 'Qwen3': '#9467bd', 'IBM Granite': '#8c564b', 'Arctic Embed': '#e377c2', 'Perplexity Embed': '#17becf', 'GTE': '#bcbd22', 'BGE': '#7f7f7f', 'E5': '#393b79', 'OpenAI Embedding': '#637939', 'Nomic Embed': '#6a1b9a', 'Cohere Embed': '#843c39', 'EmbeddingGemma': '#e91e63', 'Tarka': '#43a047', 'Jasper': '#756bb1', 'Fusion': '#e6550d', 'BM25': '#969696', 'Other': '#9e9e9e' }; const FAMILY_COLOR_PALETTE = [ '#3949ab', '#00897b', '#e53935', '#8e24aa', '#f4511e', '#1e88e5', '#43a047', '#6d4c41', '#00acc1', '#7cb342', '#5e35b1', '#c2185b', '#ff8f00', '#546e7a', '#2e7d32', '#ad1457', '#039be5', '#ef6c00' ]; function inferModelFamily(rawName) { const name = String(rawName || '').toLowerCase().replace(/^oracle:\s*/i, ''); const familyRules = [ { key: 'stella', label: 'Stella' }, { key: 'harrier', label: 'Harrier OSS' }, { key: 'voyage', label: 'Voyage' }, { key: 'jina', label: 'Jina' }, { key: 'qwen3', label: 'Qwen3' }, { key: 'granite', label: 'IBM Granite' }, { key: 'arctic embed', label: 'Arctic Embed' }, { key: 'perplexity embed', label: 'Perplexity Embed' }, { key: 'gte', label: 'GTE' }, { key: 'bge', label: 'BGE' }, { key: 'e5', label: 'E5' }, { key: 'openai text-embedding', label: 'OpenAI Embedding' }, { key: 'jasper', label: 'Stella' }, { key: 'coderankembed', label: 'Nomic Embed' }, { key: 'nomic embed', label: 'Nomic Embed' }, { key: 'cohere embed', label: 'Cohere Embed' }, { key: 'embeddinggemma', label: 'EmbeddingGemma' }, { key: 'tarka', label: 'Tarka' }, { key: 'fusion', label: 'Fusion' }, { key: 'bm25', label: 'BM25' } ]; for (const rule of familyRules) { if (name.includes(rule.key)) return rule.label; } return 'Other'; } function buildFamilyColorMap(dataToRender) { const families = Array.from( new Set( dataToRender .map(row => inferModelFamily(row.info?.name)) .filter(Boolean) ) ).sort((a, b) => a.localeCompare(b)); const colorMap = {}; let fallbackIndex = 0; families.forEach((family, idx) => { if (PINNED_FAMILY_COLORS[family]) { colorMap[family] = PINNED_FAMILY_COLORS[family]; } else { colorMap[family] = FAMILY_COLOR_PALETTE[fallbackIndex % FAMILY_COLOR_PALETTE.length]; fallbackIndex += 1; } }); return colorMap; } function formatParameterSize(sizeInBillions) { if (sizeInBillions === undefined || sizeInBillions === null || Number.isNaN(sizeInBillions)) return '-'; if (sizeInBillions < 1) { return `${(sizeInBillions * 1000).toFixed(0)}M`; } return `${sizeInBillions.toFixed(3)}B`; } function getModelAverageMetric(data, matcher, metricKey) { const row = (data || []).find(item => matcher(String(item?.info?.name || '').toLowerCase())); const val = row?.datasets?.average?.[metricKey]; if (val === undefined || val === null || Number.isNaN(Number(val))) return null; return Number(val); } function parseSizeToBillions(sizeStr) { if (sizeStr === undefined || sizeStr === null) return null; const raw = String(sizeStr).trim(); if (raw === '' || raw === '-') return null; const m = raw.match(/^([\d.]+)\s*([BMK])$/i); if (!m) return null; const num = parseFloat(m[1]); if (Number.isNaN(num)) return null; const unit = m[2].toUpperCase(); if (unit === 'B') return num; if (unit === 'M') return num / 1000; if (unit === 'K') return num / 1e6; return null; } function renderRecallPlots(dataToRender) { if (typeof Plotly === 'undefined') return; const emptyMsg = document.getElementById('recall-plots-empty'); const hasAnyParams = dataToRender.some(row => parseSizeToBillions(row.info?.size) !== null); if (emptyMsg) { if (!hasAnyParams) { emptyMsg.textContent = 'No models in the current filter have a numeric parameter count; adjust filters or refer to the table for models without a reported size.'; emptyMsg.classList.remove('is-hidden'); } else { emptyMsg.textContent = ''; emptyMsg.classList.add('is-hidden'); } } const plotConfig = { responsive: true, displayModeBar: true, displaylogo: false }; const familyColors = buildFamilyColorMap(dataToRender); AVERAGE_METRIC_PLOTS.forEach(({ plotId, metricKey, yTitle, hoverMetric, yMin, yMax }) => { const el = document.getElementById(plotId); if (!el) return; const grouped = {}; dataToRender.forEach(row => { const typeKey = row.info?.type; const x = parseSizeToBillions(row.info?.size); const y = row.datasets?.average?.[metricKey]; if (x === null || y === undefined || y === null || Number.isNaN(Number(y))) return; const family = inferModelFamily(row.info?.name); if (!grouped[family]) { grouped[family] = { x: [], y: [], symbols: [], hovertext: [] }; } grouped[family].x.push(x); grouped[family].y.push(Number(y)); const modelName = row.info?.name ?? ''; const typeLabel = RECALL_TYPE_LABELS[typeKey] || typeKey || ''; const paramLabel = formatParameterSize(x); grouped[family].hovertext.push( `${modelName}
Family: ${family}
Type: ${typeLabel}
Parameters: ${paramLabel}` ); grouped[family].symbols.push(TYPE_SYMBOLS[typeKey] || 'circle'); }); const traces = Object.keys(grouped) .sort((a, b) => a.localeCompare(b)) .map(family => ({ type: 'scatter', mode: 'markers', name: family, x: grouped[family].x, y: grouped[family].y, hovertext: grouped[family].hovertext, marker: { color: familyColors[family] || '#666', symbol: grouped[family].symbols, size: 12, opacity: 0.95, line: { width: 1, color: '#fff' } }, hovertemplate: '%{hovertext}
' + hoverMetric + ': %{y:.3f}' })); const xValues = traces.flatMap(t => t.x || []); const xMin = xValues.length ? Math.min(...xValues) : null; const xMax = xValues.length ? Math.max(...xValues) : null; const baselineData = fullLeaderboardData || dataToRender; const bm25Score = getModelAverageMetric( baselineData, name => name === 'bm25', metricKey ); const fusionScore = getModelAverageMetric( baselineData, name => name === 'fusion (bm25, bge, e5, voyage)', metricKey ); if (xMin !== null && xMax !== null) { if (bm25Score !== null) { traces.push({ type: 'scatter', mode: 'lines', name: 'BM25', x: [xMin, xMax], y: [bm25Score, bm25Score], line: { color: 'rgba(97,97,97,0.55)', width: 1.1, dash: 'dash' }, hovertemplate: `BM25
${hoverMetric}: ${bm25Score.toFixed(3)}` }); } if (fusionScore !== null) { traces.push({ type: 'scatter', mode: 'lines', name: 'Fusion', x: [xMin, xMax], y: [fusionScore, fusionScore], line: { color: 'rgba(106,27,154,0.55)', width: 1.1, dash: 'dot' }, hovertemplate: `Fusion
${hoverMetric}: ${fusionScore.toFixed(3)}` }); } } const totalPoints = traces.reduce((acc, t) => acc + (t.x?.length || 0), 0); if (totalPoints === 0) { const emptyLayout = { annotations: [ { text: hasAnyParams ? 'No data for this view.' : 'No models with numeric parameter counts.', xref: 'paper', yref: 'paper', x: 0.5, y: 0.5, showarrow: false, font: { size: 14, color: '#666' } } ], xaxis: { visible: false }, yaxis: { visible: false }, margin: { t: 20, r: 20, b: 20, l: 20 } }; if (el.data && el.layout) { Plotly.react(el, [], emptyLayout, plotConfig); } else { Plotly.newPlot(el, [], emptyLayout, plotConfig); } return; } const layout = { margin: { t: 28, r: 12, b: 88, l: 56 }, xaxis: { title: { text: 'Model Parameters (Billions)', standoff: 18 }, type: 'log', showgrid: true, zeroline: false, tickfont: { size: 11 } }, yaxis: { title: { text: yTitle }, range: [yMin, yMax], tickformat: '.2f', showgrid: true }, legend: { orientation: 'h', yanchor: 'top', y: -0.28, xanchor: 'center', x: 0.5 }, hovermode: 'closest', dragmode: 'zoom' }; if (el.data && el.layout) { Plotly.react(el, traces, layout, plotConfig); } else { Plotly.newPlot(el, traces, layout, plotConfig); } }); } function resizeRecallPlots() { if (typeof Plotly === 'undefined') return; AVERAGE_METRIC_PLOTS.forEach(({ plotId }) => { const el = document.getElementById(plotId); if (!el) return; try { Plotly.Plots.resize(el); } catch (_) { /* no plot yet */ } }); } $(document).ready(function () { var options = { slidesToScroll: 1, slidesToShow: 1, loop: true, infinite: true, autoplay: true, autoplaySpeed: 5000, }; bulmaCarousel.attach('.carousel', options); bulmaSlider.attach(); }); document.addEventListener('DOMContentLoaded', function () { loadTableData(); setupEventListeners(); window.addEventListener('resize', function () { adjustNameColumnWidth(); resizeRecallPlots(); }); }); function isNewModel(dateStr) { const modelDate = new Date(dateStr); const now = new Date(); const daysDiff = (now - modelDate) / (1000 * 60 * 60 * 24); return daysDiff <= 90; } function loadTableData() { console.log('Starting to load table data...'); fetch('./leaderboard_data.json') .then(response => { console.log('Response status:', response.status); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } return response.json(); }) .then(data => { console.log('Data loaded successfully:', data); fullLeaderboardData = data.leaderboardData; const checkedTypes = Array.from(document.querySelectorAll('.type-filter:checked')).map(cb => cb.value); const initialData = fullLeaderboardData.filter(row => checkedTypes.includes(row.info.type)); renderTableData(initialData); }) .catch(error => { console.error('Error loading table data:', error); document.querySelector('#freshstack-table tbody').innerHTML = ` Error loading data: ${error.message}
Please ensure you're accessing this page through a web server (http://localhost:8000) and not directly from the file system. `; }); } function renderTableData(dataToRender) { const tbody = document.querySelector('#freshstack-table tbody'); tbody.innerHTML = ''; const datasets = ['average', 'langchain', 'yolo', 'laravel', 'angular', 'godot']; const metrics = [ { key: 'alpha_ndcg_10', label: 'α@10' }, { key: 'coverage_20', label: 'C@20' }, { key: 'recall_50', label: 'R@50' } ]; const scoresByDataset = {}; datasets.forEach(dataset => { scoresByDataset[dataset] = prepareScoresForStyling(dataToRender, dataset); }); dataToRender.forEach((row, index) => { const tr = document.createElement('tr'); tr.classList.add(row.info.type); const nameText = row.info.name; const badge = isNewModel(row.info.date) ? `NEW` : ''; const nameCell = row.info.link?.trim() ? `${nameText}${badge}` : `${nameText}${badge}`; let datasetCells = ''; datasets.forEach(dataset => { metrics.forEach(metric => { const val = row.datasets?.[dataset]?.[metric.key] ?? '-'; const rank = scoresByDataset[dataset]?.[metric.key]?.[index] ?? -1; const styledValue = applyStyle(val, rank); datasetCells += `${styledValue}`; }); }); tr.innerHTML = ` ${nameCell} ${row.info.size} ${row.info.date} ${datasetCells} `; tbody.appendChild(tr); }); setTimeout(adjustNameColumnWidth, 0); initializeSorting(); renderRecallPlots(dataToRender); } function setupEventListeners() { document.querySelector('.reset-cell').addEventListener('click', resetTable); document.querySelectorAll('.type-filter').forEach(cb => { cb.addEventListener('change', applyTypeFilter); }); const headers = document.querySelectorAll('#freshstack-table thead tr:last-child th.sortable'); headers.forEach(header => { header.addEventListener('click', function () { sortTable(this); }); }); document.getElementById('download-csv').addEventListener('click', () => { exportTableToCSV(); }); document.getElementById('download-json').addEventListener('click', () => { exportTableToJSON(); }); } function applyTypeFilter() { if (!fullLeaderboardData) return; const checkedTypes = Array.from(document.querySelectorAll('.type-filter:checked')).map(cb => cb.value); const filteredData = fullLeaderboardData.filter(row => checkedTypes.includes(row.info.type)); renderTableData(filteredData); } function toggleDetails(section) { const sections = ['average', 'langchain', 'yolo', 'godot', 'laravel', 'angular']; sections.forEach(sec => { const detailCells = document.querySelectorAll('.' + sec + '-details'); const headerCell = document.querySelector('.' + sec + '-details-cell'); if (sec === section) { detailCells.forEach(cell => cell.classList.toggle('hidden')); headerCell.setAttribute('colspan', headerCell.getAttribute('colspan') === '1' ? '3' : '3'); } else { detailCells.forEach(cell => cell.classList.add('hidden')); headerCell.setAttribute('colspan', '3'); } }); setTimeout(adjustNameColumnWidth, 0); } function resetTable() { document.querySelectorAll( '.average-details, .langchain-details, .yolo-details, .godot-details, .laravel-details, .angular-details' ).forEach(cell => cell.classList.remove('hidden')); ['average', 'langchain', 'yolo', 'godot', 'laravel', 'angular'].forEach(section => { document.querySelector(`.${section}-details-cell`).setAttribute('colspan', '3'); }); const headerToSort = getDefaultSortHeader(); if (headerToSort) { sortTable(headerToSort, true, false); } // Reset checkboxes to default (Oracle unchecked) document.querySelectorAll('.type-filter').forEach(cb => { cb.checked = cb.value !== 'upper_baseline'; }); // Reload with default filter applied const checkedTypes = Array.from(document.querySelectorAll('.type-filter:checked')).map(cb => cb.value); renderTableData(fullLeaderboardData.filter(row => checkedTypes.includes(row.info.type))); setTimeout(adjustNameColumnWidth, 0); } function sortTable(header, forceDescending = false, maintainOrder = false) { const table = document.getElementById('freshstack-table'); const tbody = table.querySelector('tbody'); const rows = Array.from(tbody.querySelectorAll('tr')); const headers = Array.from(header.parentNode.children); const columnIndex = headers.indexOf(header); const sortType = header.dataset.sort; const isDescending = forceDescending || (!header.classList.contains('asc') && !header.classList.contains('desc')) || header.classList.contains('asc'); if (!maintainOrder) { rows.sort((a, b) => { const aValue = getCellValue(a, columnIndex); const bValue = getCellValue(b, columnIndex); if (aValue === '-' && bValue !== '-') return isDescending ? 1 : -1; if (bValue === '-' && aValue !== '-') return isDescending ? -1 : 1; if (sortType === 'number') { return isDescending ? parseFloat(bValue) - parseFloat(aValue) : parseFloat(aValue) - parseFloat(bValue); } else if (sortType === 'date') { return isDescending ? new Date(bValue) - new Date(aValue) : new Date(aValue) - new Date(bValue); } else { return isDescending ? bValue.localeCompare(aValue) : aValue.localeCompare(bValue); } }); } headers.forEach(th => th.classList.remove('asc', 'desc')); header.classList.add(isDescending ? 'desc' : 'asc'); rows.forEach(row => tbody.appendChild(row)); updateRanks(); setTimeout(adjustNameColumnWidth, 0); } function getCellValue(row, index) { const cells = Array.from(row.children); let cell = cells[index]; const datasets = ['average', 'langchain', 'yolo', 'godot', 'laravel', 'angular']; if (cell.classList.contains('hidden')) { for (const dataset of datasets) { if (cell.classList.contains(`${dataset}-details`)) { cell = cells.find( c => c.classList.contains(`${dataset}-details`) && !c.classList.contains('hidden') ); break; } } } return cell ? cell.textContent.trim() : ''; } function getDefaultSortHeader() { const headers = document.querySelectorAll('#freshstack-table thead tr:last-child th.sortable'); for (const header of headers) { const text = header.textContent.trim(); if (header.classList.contains('average-details') && text === 'R@50') { return header; } } return document.querySelector('#freshstack-table thead tr:last-child th[data-sort="number"]:not(.hidden)'); } function initializeSorting() { const headerToSort = getDefaultSortHeader(); if (headerToSort) { sortTable(headerToSort, true, false); } } function updateRanks() { const tbody = document.querySelector('#freshstack-table tbody'); if (!tbody) return; const rows = Array.from(tbody.querySelectorAll('tr')); rows.forEach((row, index) => { const cell = row.querySelector('.rank-cell'); if (cell) { cell.textContent = index + 1; } }); } function adjustNameColumnWidth() { // Column widths are controlled by CSS; no JS override needed. } function prepareScoresForStyling(data, section) { const scores = {}; const fields = ['alpha_ndcg_10', 'coverage_20', 'recall_50']; fields.forEach(field => { const valuesWithIndex = []; data.forEach((row, idx) => { const val = row.datasets?.[section]?.[field]; if (val !== undefined && val !== null && val !== '-') { valuesWithIndex.push({ index: idx, value: parseFloat(val) }); } }); valuesWithIndex.sort((a, b) => b.value - a.value); const ranks = Array(data.length).fill(-1); let currentRank = 0; for (let i = 0; i < valuesWithIndex.length; i++) { if (i > 0 && valuesWithIndex[i].value !== valuesWithIndex[i - 1].value) { currentRank = i; } ranks[valuesWithIndex[i].index] = currentRank; } scores[field] = ranks; }); return scores; } function applyStyle(value, rank) { if (value === undefined || value === null || value === '-') return '-'; if (rank === 0) return `${value}`; if (rank === 1) return `${value}`; return value; } function exportTableToCSV(filename = 'leaderboard.csv') { const table = document.getElementById('freshstack-table'); const datasets = ['average', 'langchain', 'yolo', 'laravel', 'angular', 'godot']; const metrics = ['α@10', 'C@20', 'R@50']; const csv = []; const headerRow1 = ['Rank', 'Model Name', 'Size', 'Date']; datasets.forEach(dataset => { headerRow1.push(dataset.toUpperCase(), '', ''); }); csv.push(headerRow1.join(',')); const headerRow2 = ['-', '-', '-', '-']; datasets.forEach(() => { headerRow2.push(...metrics); }); csv.push(headerRow2.join(',')); const rows = Array.from(table.querySelectorAll('tbody tr')); rows.forEach(row => { const cells = Array.from(row.querySelectorAll('td')); const rowData = cells.map(cell => { let text = cell.textContent.trim().replace(/[\n\r]+/g, ' ').replace(/,/g, ';'); return `"${text}"`; }); csv.push(rowData.join(',')); }); const csvContent = csv.join('\n'); const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); const link = document.createElement('a'); link.href = URL.createObjectURL(blob); link.setAttribute('download', filename); document.body.appendChild(link); link.click(); document.body.removeChild(link); } function exportTableToJSON(filename = 'leaderboard.json') { fetch('./leaderboard_data.json') .then(response => { if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`); return response.json(); }) .then(data => { const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' }); const link = document.createElement('a'); link.href = URL.createObjectURL(blob); link.setAttribute('download', filename); document.body.appendChild(link); link.click(); document.body.removeChild(link); }) .catch(error => { console.error('Error exporting JSON:', error); alert('Failed to export JSON. See console for details.'); }); }