Probe deploy
Browse files- README.md +12 -7
- __pycache__/app.cpython-310.pyc +0 -0
- __pycache__/app.cpython-311.pyc +0 -0
- __pycache__/constants.cpython-310.pyc +0 -0
- app.py +114 -0
- constants.py +57 -0
- requirements.txt +2 -0
README.md
CHANGED
|
@@ -1,12 +1,17 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
-
pinned:
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: AVBench Leaderboard
|
| 3 |
+
emoji: "🥇"
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.43.1
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
short_description: AVBench leaderboard table (normal/hard splits)
|
| 12 |
+
python_version: "3.10"
|
| 13 |
---
|
| 14 |
|
| 15 |
+
# AVBench Leaderboard
|
| 16 |
+
|
| 17 |
+
This Space displays the fixed benchmark table for Normal and Hard splits.
|
__pycache__/app.cpython-310.pyc
ADDED
|
Binary file (2.25 kB). View file
|
|
|
__pycache__/app.cpython-311.pyc
ADDED
|
Binary file (5.39 kB). View file
|
|
|
__pycache__/constants.cpython-310.pyc
ADDED
|
Binary file (1.8 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
from constants import (
|
| 5 |
+
AVBENCH_INTRODUCTION,
|
| 6 |
+
HARD_ROWS,
|
| 7 |
+
METRIC_COLUMNS,
|
| 8 |
+
METRIC_WEIGHTS,
|
| 9 |
+
NORMAL_ROWS,
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
DISPLAY_COLUMNS = ["Medal", "Model"] + METRIC_COLUMNS + ["Overall"]
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def compute_normalized_overall(df):
|
| 17 |
+
norm_df = df.copy()
|
| 18 |
+
total_weight = sum(METRIC_WEIGHTS[c] for c in METRIC_COLUMNS)
|
| 19 |
+
|
| 20 |
+
weighted_sum = 0.0
|
| 21 |
+
for c in METRIC_COLUMNS:
|
| 22 |
+
col_min = norm_df[c].min()
|
| 23 |
+
col_max = norm_df[c].max()
|
| 24 |
+
if col_max > col_min:
|
| 25 |
+
col_norm = (norm_df[c] - col_min) / (col_max - col_min)
|
| 26 |
+
else:
|
| 27 |
+
# If all values are identical in a metric, it contributes equally to all rows.
|
| 28 |
+
col_norm = 0.0
|
| 29 |
+
weighted_sum = weighted_sum + col_norm * METRIC_WEIGHTS[c]
|
| 30 |
+
|
| 31 |
+
norm_df["Overall"] = (weighted_sum / total_weight).round(4)
|
| 32 |
+
return norm_df
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def add_medals(df):
|
| 36 |
+
medal_df = df.copy().reset_index(drop=True)
|
| 37 |
+
medals = [""] * len(medal_df)
|
| 38 |
+
if len(medal_df) > 0:
|
| 39 |
+
medals[0] = "🥇"
|
| 40 |
+
if len(medal_df) > 1:
|
| 41 |
+
medals[1] = "🥈"
|
| 42 |
+
if len(medal_df) > 2:
|
| 43 |
+
medals[2] = "🥉"
|
| 44 |
+
medal_df["Medal"] = medals
|
| 45 |
+
return medal_df
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def build_df(rows, split_name):
|
| 49 |
+
df = pd.DataFrame(rows, columns=["Model"] + METRIC_COLUMNS)
|
| 50 |
+
df = compute_normalized_overall(df)
|
| 51 |
+
df["Split"] = split_name
|
| 52 |
+
return df
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def refresh_table(split_name, sort_metric):
|
| 56 |
+
if split_name == "Normal":
|
| 57 |
+
df = normal_df.copy()
|
| 58 |
+
elif split_name == "Hard":
|
| 59 |
+
df = hard_df.copy()
|
| 60 |
+
else:
|
| 61 |
+
df = pd.concat([normal_df, hard_df], ignore_index=True)
|
| 62 |
+
|
| 63 |
+
sort_col = "Overall" if sort_metric == "Overall" else sort_metric
|
| 64 |
+
df = df.sort_values(by=sort_col, ascending=False).reset_index(drop=True)
|
| 65 |
+
df = add_medals(df)
|
| 66 |
+
return df[["Split"] + DISPLAY_COLUMNS] if split_name == "All" else df[DISPLAY_COLUMNS]
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
normal_df = build_df(NORMAL_ROWS, "Normal")
|
| 70 |
+
hard_df = build_df(HARD_ROWS, "Hard")
|
| 71 |
+
|
| 72 |
+
with gr.Blocks(title="AVBench Leaderboard") as demo:
|
| 73 |
+
gr.Markdown(AVBENCH_INTRODUCTION)
|
| 74 |
+
|
| 75 |
+
with gr.Row():
|
| 76 |
+
split_selector = gr.Dropdown(
|
| 77 |
+
choices=["All", "Normal", "Hard"],
|
| 78 |
+
value="All",
|
| 79 |
+
label="Split",
|
| 80 |
+
)
|
| 81 |
+
sort_selector = gr.Dropdown(
|
| 82 |
+
choices=["Overall"] + METRIC_COLUMNS,
|
| 83 |
+
value="Overall",
|
| 84 |
+
label="Sort By",
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
leaderboard = gr.Dataframe(
|
| 88 |
+
value=refresh_table("All", "Overall"),
|
| 89 |
+
interactive=False,
|
| 90 |
+
wrap=True,
|
| 91 |
+
label="Leaderboard",
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
with gr.Accordion("Metric Groups", open=False):
|
| 95 |
+
gr.Markdown(
|
| 96 |
+
"- Cross-Modal Alignment & Sync: AV, AT, VT, SyncNet\n"
|
| 97 |
+
"- Unimodal Generation Quality: SC, DF-Arena, NISQA, Audiobox, DOVER++, Aesthetic\n"
|
| 98 |
+
"- Overall: min-max normalize each metric first, then weighted sum (current default: equal weights)."
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
split_selector.change(
|
| 102 |
+
refresh_table,
|
| 103 |
+
inputs=[split_selector, sort_selector],
|
| 104 |
+
outputs=leaderboard,
|
| 105 |
+
)
|
| 106 |
+
sort_selector.change(
|
| 107 |
+
refresh_table,
|
| 108 |
+
inputs=[split_selector, sort_selector],
|
| 109 |
+
outputs=leaderboard,
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
demo.launch()
|
constants.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
AVBENCH_INTRODUCTION = """# AVBench Leaderboard
|
| 2 |
+
|
| 3 |
+
Which T2AV model performs better on AVBench?
|
| 4 |
+
|
| 5 |
+
This leaderboard reports two splits:
|
| 6 |
+
- Normal Split
|
| 7 |
+
- Hard Split
|
| 8 |
+
|
| 9 |
+
Metrics are grouped into:
|
| 10 |
+
- Cross-Modal Alignment & Sync: AV, AT, VT, SyncNet
|
| 11 |
+
- Unimodal Generation Quality: SC, DF-Arena, NISQA, Audiobox, DOVER++, Aesthetic
|
| 12 |
+
|
| 13 |
+
All metrics are higher-is-better.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
METRIC_COLUMNS = [
|
| 17 |
+
"AV",
|
| 18 |
+
"AT",
|
| 19 |
+
"VT",
|
| 20 |
+
"SyncNet",
|
| 21 |
+
"SC",
|
| 22 |
+
"DF-Arena",
|
| 23 |
+
"NISQA",
|
| 24 |
+
"Audiobox",
|
| 25 |
+
"DOVER++",
|
| 26 |
+
"Aesthetic",
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
# Equal weight by default, can be adjusted later.
|
| 30 |
+
METRIC_WEIGHTS = {
|
| 31 |
+
"AV": 1.0,
|
| 32 |
+
"AT": 1.0,
|
| 33 |
+
"VT": 1.0,
|
| 34 |
+
"SyncNet": 1.0,
|
| 35 |
+
"SC": 1.0,
|
| 36 |
+
"DF-Arena": 1.0,
|
| 37 |
+
"NISQA": 1.0,
|
| 38 |
+
"Audiobox": 1.0,
|
| 39 |
+
"DOVER++": 1.0,
|
| 40 |
+
"Aesthetic": 1.0,
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
NORMAL_ROWS = [
|
| 44 |
+
["Sora 2", 0.8713, 0.5844, 0.7599, 4.9057, 87.8391, 0.4328, 2.3784, 3.1759, 60.0125, 4.0704],
|
| 45 |
+
["Veo 3 Fast", 0.6924, 0.5708, 0.7235, 6.5943, 77.4950, 0.3043, 2.8191, 3.5877, 69.2275, 4.9967],
|
| 46 |
+
["Wan 2.6", 0.8207, 0.5826, 0.7556, 4.5016, 91.5568, 0.0441, 3.0289, 3.9271, 71.6473, 4.7790],
|
| 47 |
+
["Kling 2.6", 0.7626, 0.5603, 0.7501, 8.1027, 68.7844, 0.1665, 3.3141, 3.8082, 65.6786, 5.4885],
|
| 48 |
+
["Seedance 1.5 Pro", 0.6536, 0.5764, 0.7363, 5.0146, 84.9268, 0.1602, 3.6411, 4.1686, 71.7205, 4.7373],
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
HARD_ROWS = [
|
| 52 |
+
["Sora 2", 0.9320, 0.5573, 0.7190, 3.7932, 76.7905, 0.5498, 2.0564, 3.1339, 58.1538, 4.0434],
|
| 53 |
+
["Veo 3 Fast", 0.7766, 0.5130, 0.6943, 3.4535, 70.3144, 0.3827, 2.3321, 3.6113, 67.0833, 5.1438],
|
| 54 |
+
["Wan 2.6", 0.8780, 0.5517, 0.7482, 3.0488, 84.4512, 0.0498, 3.0726, 4.0924, 71.5229, 4.7721],
|
| 55 |
+
["Kling 2.6", 0.8813, 0.4970, 0.7105, 3.9844, 69.0691, 0.1469, 3.2425, 3.8912, 62.9994, 5.5033],
|
| 56 |
+
["Seedance 1.5 Pro", 0.7409, 0.5525, 0.7398, 3.3239, 80.8029, 0.2059, 3.4093, 4.1618, 69.4430, 4.7707],
|
| 57 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.43.1
|
| 2 |
+
pandas>=2.0.0
|