import json import pandas as pd from pathlib import Path SIZE_BANDS = ["All", "<10B", "10-32B", "32-100B", ">100B", "Unknown"] MODALITIES = ["All", "VL", "Omni"] def load_leaderboard_from_json(json_path="leaderboard_data.json"): """Load leaderboard data from JSON file""" try: with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f) return data['leaderboard'] except FileNotFoundError: print(f"JSON file {json_path} not found") return [] except json.JSONDecodeError: print(f"Error decoding JSON file {json_path}") return [] def _in_size_band(params_b, band): if band == "All": return True if params_b is None or pd.isna(params_b): return band == "Unknown" if band == "<10B": return params_b < 10 if band == "10-32B": return 10 <= params_b < 32 if band == "32-100B": return 32 <= params_b < 100 if band == ">100B": return params_b >= 100 if band == "Unknown": return False return True def create_leaderboard_df( json_path="leaderboard_data.json", open_source_only=False, size_band="All", modality="All", ): """Create a pandas DataFrame from JSON leaderboard data, with optional filters.""" leaderboard_data = load_leaderboard_from_json(json_path) if not leaderboard_data: return pd.DataFrame() df = pd.DataFrame(leaderboard_data) # Backfill optional columns for older JSON entries if 'params_b' not in df.columns: df['params_b'] = None if 'modality' not in df.columns: df['modality'] = "VL" # Apply filters if open_source_only: df = df[df['open_source'] == True] if modality and modality != "All": df = df[df['modality'] == modality] if size_band and size_band != "All": df = df[df['params_b'].apply(lambda v: _in_size_band(v, size_band))] if df.empty: return pd.DataFrame() # Sort by ACC score (descending) so medal icons reflect the filtered view df = df.sort_values('acc', ascending=False).reset_index(drop=True) def add_ranking_icon_and_link(index, model_name, paper_link): if index == 0: return f'🥇 {model_name}' elif index == 1: return f'🥈 {model_name}' elif index == 2: return f'🥉 {model_name}' else: return f'{model_name}' display_df = pd.DataFrame({ 'Model': [add_ranking_icon_and_link(i, model, link) for i, (model, link) in enumerate(zip(df['model'], df['link']))], 'Release Date': df['release_date'], 'HF Model': df['hf'].apply(lambda x: f'🤗' if x != "-" else "-"), 'Modality': df['modality'], 'Parameters': df['params'], 'Open Source': df['open_source'].apply(lambda x: '✓' if x else '✗'), 'ACC Score': df['acc'].apply(lambda x: f"{x:.1f}") }) return display_df def get_leaderboard_stats(json_path="leaderboard_data.json"): """Get statistics about the leaderboard""" leaderboard_data = load_leaderboard_from_json(json_path) if not leaderboard_data: return {} df = pd.DataFrame(leaderboard_data) stats = { 'total_models': len(df), 'open_source_models': int(df['open_source'].sum()), 'moe_models': int(df['moe'].apply(lambda x: 1 if x is True else 0).sum()), 'avg_acc': df['acc'].mean(), 'max_acc': df['acc'].max(), 'min_acc': df['acc'].min() } return stats