Spaces:
Running
Running
| import json | |
| import pandas as pd | |
| from pathlib import Path | |
| SIZE_BANDS = ["All", "<10B", "10-32B", "32-100B", ">100B", "Unknown"] | |
| MODALITIES = ["All", "VL", "Omni"] | |
| def load_leaderboard_from_json(json_path="leaderboard_data.json"): | |
| """Load leaderboard data from JSON file""" | |
| try: | |
| with open(json_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| return data['leaderboard'] | |
| except FileNotFoundError: | |
| print(f"JSON file {json_path} not found") | |
| return [] | |
| except json.JSONDecodeError: | |
| print(f"Error decoding JSON file {json_path}") | |
| return [] | |
| def _in_size_band(params_b, band): | |
| if band == "All": | |
| return True | |
| if params_b is None or pd.isna(params_b): | |
| return band == "Unknown" | |
| if band == "<10B": | |
| return params_b < 10 | |
| if band == "10-32B": | |
| return 10 <= params_b < 32 | |
| if band == "32-100B": | |
| return 32 <= params_b < 100 | |
| if band == ">100B": | |
| return params_b >= 100 | |
| if band == "Unknown": | |
| return False | |
| return True | |
| def create_leaderboard_df( | |
| json_path="leaderboard_data.json", | |
| open_source_only=False, | |
| size_band="All", | |
| modality="All", | |
| ): | |
| """Create a pandas DataFrame from JSON leaderboard data, with optional filters.""" | |
| leaderboard_data = load_leaderboard_from_json(json_path) | |
| if not leaderboard_data: | |
| return pd.DataFrame() | |
| df = pd.DataFrame(leaderboard_data) | |
| # Backfill optional columns for older JSON entries | |
| if 'params_b' not in df.columns: | |
| df['params_b'] = None | |
| if 'modality' not in df.columns: | |
| df['modality'] = "VL" | |
| # Apply filters | |
| if open_source_only: | |
| df = df[df['open_source'] == True] | |
| if modality and modality != "All": | |
| df = df[df['modality'] == modality] | |
| if size_band and size_band != "All": | |
| df = df[df['params_b'].apply(lambda v: _in_size_band(v, size_band))] | |
| if df.empty: | |
| return pd.DataFrame() | |
| # Sort by ACC score (descending) so medal icons reflect the filtered view | |
| df = df.sort_values('acc', ascending=False).reset_index(drop=True) | |
| def add_ranking_icon_and_link(index, model_name, paper_link): | |
| if index == 0: | |
| return f'π₯ <a href="{paper_link}" target="_blank">{model_name}</a>' | |
| elif index == 1: | |
| return f'π₯ <a href="{paper_link}" target="_blank">{model_name}</a>' | |
| elif index == 2: | |
| return f'π₯ <a href="{paper_link}" target="_blank">{model_name}</a>' | |
| else: | |
| return f'<a href="{paper_link}" target="_blank">{model_name}</a>' | |
| display_df = pd.DataFrame({ | |
| 'Model': [add_ranking_icon_and_link(i, model, link) for i, (model, link) in enumerate(zip(df['model'], df['link']))], | |
| 'Release Date': df['release_date'], | |
| 'HF Model': df['hf'].apply(lambda x: f'<a href="{x}" target="_blank">π€</a>' if x != "-" else "-"), | |
| 'Modality': df['modality'], | |
| 'Parameters': df['params'], | |
| 'Open Source': df['open_source'].apply(lambda x: 'β' if x else 'β'), | |
| 'ACC Score': df['acc'].apply(lambda x: f"{x:.1f}") | |
| }) | |
| return display_df | |
| def get_leaderboard_stats(json_path="leaderboard_data.json"): | |
| """Get statistics about the leaderboard""" | |
| leaderboard_data = load_leaderboard_from_json(json_path) | |
| if not leaderboard_data: | |
| return {} | |
| df = pd.DataFrame(leaderboard_data) | |
| stats = { | |
| 'total_models': len(df), | |
| 'open_source_models': int(df['open_source'].sum()), | |
| 'moe_models': int(df['moe'].apply(lambda x: 1 if x is True else 0).sum()), | |
| 'avg_acc': df['acc'].mean(), | |
| 'max_acc': df['acc'].max(), | |
| 'min_acc': df['acc'].min() | |
| } | |
| return stats | |