Spaces:
Running
Running
openhands openhands commited on
Commit ·
443f738
1
Parent(s): 70749cd
Fix numeric runtime sorting in leaderboard tables
Browse filesCo-authored-by: openhands <openhands@all-hands.dev>
- leaderboard_transformer.py +16 -7
- tests/test_runtime_sorting.py +40 -0
leaderboard_transformer.py
CHANGED
|
@@ -1472,38 +1472,47 @@ def format_score_column(df: pd.DataFrame, score_col_name: str) -> pd.DataFrame:
|
|
| 1472 |
return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
|
| 1473 |
|
| 1474 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1475 |
def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
|
| 1476 |
"""
|
| 1477 |
Applies custom formatting to a runtime column based on its corresponding score column.
|
| 1478 |
- If runtime is not null, formats as time with 's' suffix.
|
| 1479 |
- If runtime is null but score is not, it becomes "Missing".
|
| 1480 |
- If both runtime and score are null, it becomes "Not Submitted".
|
|
|
|
| 1481 |
Args:
|
| 1482 |
df: The DataFrame to modify.
|
| 1483 |
runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
|
| 1484 |
Returns:
|
| 1485 |
The DataFrame with the formatted runtime column.
|
| 1486 |
"""
|
| 1487 |
-
# Find the corresponding score column by replacing "Runtime" with "Score"
|
| 1488 |
score_col_name = runtime_col_name.replace("Runtime", "Score")
|
| 1489 |
|
| 1490 |
-
# Ensure the score column actually exists to avoid errors
|
| 1491 |
if score_col_name not in df.columns:
|
| 1492 |
-
return df
|
| 1493 |
|
| 1494 |
def apply_formatting_logic(row):
|
| 1495 |
runtime_value = row[runtime_col_name]
|
| 1496 |
score_value = row[score_col_name]
|
| 1497 |
status_color = "#ec4899"
|
|
|
|
|
|
|
| 1498 |
|
| 1499 |
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1500 |
-
return f"{runtime_value:.0f}s"
|
| 1501 |
elif pd.notna(score_value):
|
| 1502 |
-
return f'<span style="color: {status_color};">Missing</span>'
|
| 1503 |
else:
|
| 1504 |
-
return f'<span style="color: {status_color};">Not Submitted</span>'
|
| 1505 |
|
| 1506 |
-
# Apply the logic to the specified runtime column and update the DataFrame
|
| 1507 |
df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
|
| 1508 |
|
| 1509 |
return df
|
|
|
|
| 1472 |
return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
|
| 1473 |
|
| 1474 |
|
| 1475 |
+
def _hidden_runtime_sort_key(runtime_value: float | int | None, score_value: float | int | None) -> str:
|
| 1476 |
+
"""Build a hidden prefix so Gradio's string-based runtime sorting behaves numerically."""
|
| 1477 |
+
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1478 |
+
return f"{float(runtime_value):020.6f}"
|
| 1479 |
+
if pd.notna(score_value):
|
| 1480 |
+
return "99999999999999999998"
|
| 1481 |
+
return "99999999999999999999"
|
| 1482 |
+
|
| 1483 |
+
|
| 1484 |
def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
|
| 1485 |
"""
|
| 1486 |
Applies custom formatting to a runtime column based on its corresponding score column.
|
| 1487 |
- If runtime is not null, formats as time with 's' suffix.
|
| 1488 |
- If runtime is null but score is not, it becomes "Missing".
|
| 1489 |
- If both runtime and score are null, it becomes "Not Submitted".
|
| 1490 |
+
- Adds a hidden, zero-padded numeric prefix so Gradio sorts the column numerically.
|
| 1491 |
Args:
|
| 1492 |
df: The DataFrame to modify.
|
| 1493 |
runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
|
| 1494 |
Returns:
|
| 1495 |
The DataFrame with the formatted runtime column.
|
| 1496 |
"""
|
|
|
|
| 1497 |
score_col_name = runtime_col_name.replace("Runtime", "Score")
|
| 1498 |
|
|
|
|
| 1499 |
if score_col_name not in df.columns:
|
| 1500 |
+
return df
|
| 1501 |
|
| 1502 |
def apply_formatting_logic(row):
|
| 1503 |
runtime_value = row[runtime_col_name]
|
| 1504 |
score_value = row[score_col_name]
|
| 1505 |
status_color = "#ec4899"
|
| 1506 |
+
sort_key = _hidden_runtime_sort_key(runtime_value, score_value)
|
| 1507 |
+
hidden_sort_prefix = f'<span style="display:none">{sort_key}</span>'
|
| 1508 |
|
| 1509 |
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1510 |
+
return f"{hidden_sort_prefix}{runtime_value:.0f}s"
|
| 1511 |
elif pd.notna(score_value):
|
| 1512 |
+
return f'{hidden_sort_prefix}<span style="color: {status_color};">Missing</span>'
|
| 1513 |
else:
|
| 1514 |
+
return f'{hidden_sort_prefix}<span style="color: {status_color};">Not Submitted</span>'
|
| 1515 |
|
|
|
|
| 1516 |
df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
|
| 1517 |
|
| 1518 |
return df
|
tests/test_runtime_sorting.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
from leaderboard_transformer import format_runtime_column
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_runtime_strings_sort_numerically_in_ascending_order():
|
| 7 |
+
df = pd.DataFrame(
|
| 8 |
+
{
|
| 9 |
+
"Average Score": [0.8, 0.8, 0.8, 0.8, None],
|
| 10 |
+
"Average Runtime": [1323.0, 372.0, 410.0, None, None],
|
| 11 |
+
}
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
formatted = format_runtime_column(df.copy(), "Average Runtime")
|
| 15 |
+
runtimes = formatted["Average Runtime"].tolist()
|
| 16 |
+
|
| 17 |
+
assert sorted(runtimes) == [
|
| 18 |
+
runtimes[1],
|
| 19 |
+
runtimes[2],
|
| 20 |
+
runtimes[0],
|
| 21 |
+
runtimes[3],
|
| 22 |
+
runtimes[4],
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_runtime_formatting_preserves_visible_labels():
|
| 27 |
+
df = pd.DataFrame(
|
| 28 |
+
{
|
| 29 |
+
"Average Score": [0.8, 0.8, None],
|
| 30 |
+
"Average Runtime": [45.2, None, None],
|
| 31 |
+
}
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
formatted = format_runtime_column(df.copy(), "Average Runtime")
|
| 35 |
+
values = formatted["Average Runtime"].tolist()
|
| 36 |
+
|
| 37 |
+
assert values[0].endswith("45s")
|
| 38 |
+
assert values[1].endswith("Missing</span>")
|
| 39 |
+
assert values[2].endswith("Not Submitted</span>")
|
| 40 |
+
assert 'display:none' in values[0]
|