math-under-llm / ui /tab_leaderboard.py
Alex W.
ๆ”นๅŠจๆฑ‡ๆ€ป
ba623bd
# ui/tab_leaderboard.py
"""
Tab3: Wang's Five Laws Leaderboard
- Ranked by wang_score (= 1 โˆ’ pseudo-bulk median SSR_QK, standard layers only)
- On Refresh: silently re-computes all model_summary rows (pseudo-bulk migration)
- Filter by modality (default: language)
- Filter by layer_type (default: standard)
"""
import gradio as gr
import pandas as pd
import numpy as np
from db.schema import init_db
from db.reader import get_leaderboard
from db.writer import refresh_all_summaries
def _format_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
if df.empty:
return df
df = df.copy()
df["model_name"] = df["model_id"].apply(
lambda x: x.split("/")[-1] if "/" in x else x
)
df["wang_score_pct"] = df["wang_score"].apply(
lambda x: f"{x*100:.3f}" if pd.notna(x) else "N/A"
)
for col in ["median_pearson_QK", "median_ssr_QK", "mean_ssr_QK"]:
if col in df.columns:
df[col] = df[col].apply(
lambda x: f"{x:.6f}" if pd.notna(x) else "N/A"
)
display_cols = [
"model_name", "modality", "layer_type",
"wang_score_pct",
"median_pearson_QK", "median_ssr_QK", "mean_ssr_QK",
"median_cosU_QK", "median_cosU_QV", "median_cosV_QK",
"n_layers", "n_records", "model_id",
]
existing = [c for c in display_cols if c in df.columns]
return df[existing]
def load_leaderboard(
modality: str,
layer_type: str,
) -> tuple[pd.DataFrame, str]:
conn = init_db()
# โ”€โ”€ Silently refresh all summaries (pseudo-bulk migration) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
n_refreshed = refresh_all_summaries(conn)
lt = layer_type if layer_type != "all" else "standard"
mod = modality
df = get_leaderboard(conn, modality=mod, layer_type=lt, limit=100)
if df.empty:
return pd.DataFrame(), (
f"No data yet. Please analyze at least one model first.\n"
f"(modality='{mod}', layer_type='{lt}')\n\n"
f"ๆš‚ๆ— ๆ•ฐๆฎ๏ผŒ่ฏทๅ…ˆๅœจใ€ŒAnalyzeใ€Tab ๅˆ†ๆž่‡ณๅฐ‘ไธ€ไธชๆจกๅž‹ใ€‚"
)
formatted = _format_leaderboard(df)
status = (
f"โœ… {len(formatted)} entries "
f"| modality={mod} layer_type={lt} "
f"| summaries refreshed: {n_refreshed}"
)
return formatted, status
def build_tab_leaderboard():
with gr.Tab("๐Ÿ† Leaderboard"):
gr.Markdown(r"""
## Wang's Five Laws โ€” Model Leaderboard
**Wang Score = 1 โˆ’ median(SSR\_QK)** Higher is better. Theoretical max = 1.
Computed from `standard` layers only (global/KV-shared layers excluded).
Metrics use **pseudo-bulk aggregation** (Nature Comms 2021) to avoid GQA pseudoreplication.
> ็Ž‹ๆฐ่ฏ„ๅˆ† = 1 โˆ’ median(SSR_QK)๏ผŒ่ถŠ้ซ˜่ถŠๅฅฝ๏ผŒ็†่ฎบๆžๅ€ผ=1ใ€‚
> ไป…ๅŸบไบŽ standard ๅฑ‚่ฎก็ฎ—ใ€‚้‡‡็”จ pseudo-bulk ไธคๆญฅ่šๅˆ้ฟๅ… GQA ไผช้‡ๅค่ฎกๆ•ฐใ€‚
""")
with gr.Row():
modality_input = gr.Dropdown(
label="Modality",
choices=["language", "vision", "audio", "all"],
value="language",
scale=1,
info="language = text LLM components | ้€šๅธธ้€‰ language",
)
layer_type_input = gr.Dropdown(
label="Layer Type",
choices=["standard", "global", "all"],
value="standard",
scale=1,
info=(
"standard = normal layers | "
"global = K=V shared (Gemma global layers)"
),
)
refresh_btn = gr.Button(
"๐Ÿ”„ Refresh Leaderboard", variant="primary", scale=1
)
status_text = gr.Textbox(
label="Status",
value="Click Refresh to load leaderboard.",
lines=1,
interactive=False,
)
leaderboard_table = gr.Dataframe(
label="Wang Score Leaderboard (sorted by Wang Score โ†“)",
headers=[
"model_name", "modality", "layer_type",
"wang_score_pct",
"median_pearson_QK", "median_ssr_QK", "mean_ssr_QK",
"median_cosU_QK", "median_cosU_QV", "median_cosV_QK",
"n_layers", "n_records", "model_id",
],
interactive=False,
wrap=True,
)
gr.Markdown(r"""
### Metric Reference | ๆŒ‡ๆ ‡่ฏดๆ˜Ž
| Metric | Description | Better |
|--------|-------------|--------|
| Wang Score | 1 โˆ’ median(SSR\_QK)๏ผŒ็ปผๅˆๆŽจ็†่ƒฝๅŠ›่ฏ„ๅˆ† | โ†‘ Higher |
| median\_pearson\_QK | Q/K spectral Pearson correlation (Law 1) | โ†‘ Higher |
| median\_ssr\_QK | Q/K normalized spectral mismatch (Law 2) | โ†“ Lower |
| median\_cosU\_QK | Q/K output subspace alignment (Law 4, โ‰ˆ random orthogonal) | โ‰ˆ 1/โˆšd |
| median\_cosU\_QV | Q/V output subspace (Law 4, super-orthogonal) | โ†“ Lower |
| median\_cosV\_QK | Q/K input subspace (Law 5, โ‰ˆ random orthogonal) | โ‰ˆ 1/โˆšD |
""")
refresh_btn.click(
fn=load_leaderboard,
inputs=[modality_input, layer_type_input],
outputs=[leaderboard_table, status_text],
)