Spaces:
Running
Running
| # ui/tab_leaderboard.py | |
| """ | |
| Tab3: Wang's Five Laws Leaderboard | |
| - Ranked by wang_score (= 1 โ pseudo-bulk median SSR_QK, standard layers only) | |
| - On Refresh: silently re-computes all model_summary rows (pseudo-bulk migration) | |
| - Filter by modality (default: language) | |
| - Filter by layer_type (default: standard) | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from db.schema import init_db | |
| from db.reader import get_leaderboard | |
| from db.writer import refresh_all_summaries | |
| def _format_leaderboard(df: pd.DataFrame) -> pd.DataFrame: | |
| if df.empty: | |
| return df | |
| df = df.copy() | |
| df["model_name"] = df["model_id"].apply( | |
| lambda x: x.split("/")[-1] if "/" in x else x | |
| ) | |
| df["wang_score_pct"] = df["wang_score"].apply( | |
| lambda x: f"{x*100:.3f}" if pd.notna(x) else "N/A" | |
| ) | |
| for col in ["median_pearson_QK", "median_ssr_QK", "mean_ssr_QK"]: | |
| if col in df.columns: | |
| df[col] = df[col].apply( | |
| lambda x: f"{x:.6f}" if pd.notna(x) else "N/A" | |
| ) | |
| display_cols = [ | |
| "model_name", "modality", "layer_type", | |
| "wang_score_pct", | |
| "median_pearson_QK", "median_ssr_QK", "mean_ssr_QK", | |
| "median_cosU_QK", "median_cosU_QV", "median_cosV_QK", | |
| "n_layers", "n_records", "model_id", | |
| ] | |
| existing = [c for c in display_cols if c in df.columns] | |
| return df[existing] | |
| def load_leaderboard( | |
| modality: str, | |
| layer_type: str, | |
| ) -> tuple[pd.DataFrame, str]: | |
| conn = init_db() | |
| # โโ Silently refresh all summaries (pseudo-bulk migration) โโโโโโโโโโโโ | |
| n_refreshed = refresh_all_summaries(conn) | |
| lt = layer_type if layer_type != "all" else "standard" | |
| mod = modality | |
| df = get_leaderboard(conn, modality=mod, layer_type=lt, limit=100) | |
| if df.empty: | |
| return pd.DataFrame(), ( | |
| f"No data yet. Please analyze at least one model first.\n" | |
| f"(modality='{mod}', layer_type='{lt}')\n\n" | |
| f"ๆๆ ๆฐๆฎ๏ผ่ฏทๅ ๅจใAnalyzeใTab ๅๆ่ณๅฐไธไธชๆจกๅใ" | |
| ) | |
| formatted = _format_leaderboard(df) | |
| status = ( | |
| f"โ {len(formatted)} entries " | |
| f"| modality={mod} layer_type={lt} " | |
| f"| summaries refreshed: {n_refreshed}" | |
| ) | |
| return formatted, status | |
| def build_tab_leaderboard(): | |
| with gr.Tab("๐ Leaderboard"): | |
| gr.Markdown(r""" | |
| ## Wang's Five Laws โ Model Leaderboard | |
| **Wang Score = 1 โ median(SSR\_QK)** Higher is better. Theoretical max = 1. | |
| Computed from `standard` layers only (global/KV-shared layers excluded). | |
| Metrics use **pseudo-bulk aggregation** (Nature Comms 2021) to avoid GQA pseudoreplication. | |
| > ็ๆฐ่ฏๅ = 1 โ median(SSR_QK)๏ผ่ถ้ซ่ถๅฅฝ๏ผ็่ฎบๆๅผ=1ใ | |
| > ไป ๅบไบ standard ๅฑ่ฎก็ฎใ้็จ pseudo-bulk ไธคๆญฅ่ๅ้ฟๅ GQA ไผช้ๅค่ฎกๆฐใ | |
| """) | |
| with gr.Row(): | |
| modality_input = gr.Dropdown( | |
| label="Modality", | |
| choices=["language", "vision", "audio", "all"], | |
| value="language", | |
| scale=1, | |
| info="language = text LLM components | ้ๅธธ้ language", | |
| ) | |
| layer_type_input = gr.Dropdown( | |
| label="Layer Type", | |
| choices=["standard", "global", "all"], | |
| value="standard", | |
| scale=1, | |
| info=( | |
| "standard = normal layers | " | |
| "global = K=V shared (Gemma global layers)" | |
| ), | |
| ) | |
| refresh_btn = gr.Button( | |
| "๐ Refresh Leaderboard", variant="primary", scale=1 | |
| ) | |
| status_text = gr.Textbox( | |
| label="Status", | |
| value="Click Refresh to load leaderboard.", | |
| lines=1, | |
| interactive=False, | |
| ) | |
| leaderboard_table = gr.Dataframe( | |
| label="Wang Score Leaderboard (sorted by Wang Score โ)", | |
| headers=[ | |
| "model_name", "modality", "layer_type", | |
| "wang_score_pct", | |
| "median_pearson_QK", "median_ssr_QK", "mean_ssr_QK", | |
| "median_cosU_QK", "median_cosU_QV", "median_cosV_QK", | |
| "n_layers", "n_records", "model_id", | |
| ], | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| gr.Markdown(r""" | |
| ### Metric Reference | ๆๆ ่ฏดๆ | |
| | Metric | Description | Better | | |
| |--------|-------------|--------| | |
| | Wang Score | 1 โ median(SSR\_QK)๏ผ็ปผๅๆจ็่ฝๅ่ฏๅ | โ Higher | | |
| | median\_pearson\_QK | Q/K spectral Pearson correlation (Law 1) | โ Higher | | |
| | median\_ssr\_QK | Q/K normalized spectral mismatch (Law 2) | โ Lower | | |
| | median\_cosU\_QK | Q/K output subspace alignment (Law 4, โ random orthogonal) | โ 1/โd | | |
| | median\_cosU\_QV | Q/V output subspace (Law 4, super-orthogonal) | โ Lower | | |
| | median\_cosV\_QK | Q/K input subspace (Law 5, โ random orthogonal) | โ 1/โD | | |
| """) | |
| refresh_btn.click( | |
| fn=load_leaderboard, | |
| inputs=[modality_input, layer_type_input], | |
| outputs=[leaderboard_table, status_text], | |
| ) |