openhands openhands commited on
Commit
443f738
·
1 Parent(s): 70749cd

Fix numeric runtime sorting in leaderboard tables

Browse files

Co-authored-by: openhands <openhands@all-hands.dev>

leaderboard_transformer.py CHANGED
@@ -1472,38 +1472,47 @@ def format_score_column(df: pd.DataFrame, score_col_name: str) -> pd.DataFrame:
1472
  return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
1473
 
1474
 
 
 
 
 
 
 
 
 
 
1475
  def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
1476
  """
1477
  Applies custom formatting to a runtime column based on its corresponding score column.
1478
  - If runtime is not null, formats as time with 's' suffix.
1479
  - If runtime is null but score is not, it becomes "Missing".
1480
  - If both runtime and score are null, it becomes "Not Submitted".
 
1481
  Args:
1482
  df: The DataFrame to modify.
1483
  runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
1484
  Returns:
1485
  The DataFrame with the formatted runtime column.
1486
  """
1487
- # Find the corresponding score column by replacing "Runtime" with "Score"
1488
  score_col_name = runtime_col_name.replace("Runtime", "Score")
1489
 
1490
- # Ensure the score column actually exists to avoid errors
1491
  if score_col_name not in df.columns:
1492
- return df # Return the DataFrame unmodified if there's no matching score
1493
 
1494
  def apply_formatting_logic(row):
1495
  runtime_value = row[runtime_col_name]
1496
  score_value = row[score_col_name]
1497
  status_color = "#ec4899"
 
 
1498
 
1499
  if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
1500
- return f"{runtime_value:.0f}s"
1501
  elif pd.notna(score_value):
1502
- return f'<span style="color: {status_color};">Missing</span>' # Score exists, but runtime is missing
1503
  else:
1504
- return f'<span style="color: {status_color};">Not Submitted</span>' # Neither score nor runtime exists
1505
 
1506
- # Apply the logic to the specified runtime column and update the DataFrame
1507
  df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
1508
 
1509
  return df
 
1472
  return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
1473
 
1474
 
1475
+ def _hidden_runtime_sort_key(runtime_value: float | int | None, score_value: float | int | None) -> str:
1476
+ """Build a hidden prefix so Gradio's string-based runtime sorting behaves numerically."""
1477
+ if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
1478
+ return f"{float(runtime_value):020.6f}"
1479
+ if pd.notna(score_value):
1480
+ return "99999999999999999998"
1481
+ return "99999999999999999999"
1482
+
1483
+
1484
  def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
1485
  """
1486
  Applies custom formatting to a runtime column based on its corresponding score column.
1487
  - If runtime is not null, formats as time with 's' suffix.
1488
  - If runtime is null but score is not, it becomes "Missing".
1489
  - If both runtime and score are null, it becomes "Not Submitted".
1490
+ - Adds a hidden, zero-padded numeric prefix so Gradio sorts the column numerically.
1491
  Args:
1492
  df: The DataFrame to modify.
1493
  runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
1494
  Returns:
1495
  The DataFrame with the formatted runtime column.
1496
  """
 
1497
  score_col_name = runtime_col_name.replace("Runtime", "Score")
1498
 
 
1499
  if score_col_name not in df.columns:
1500
+ return df
1501
 
1502
  def apply_formatting_logic(row):
1503
  runtime_value = row[runtime_col_name]
1504
  score_value = row[score_col_name]
1505
  status_color = "#ec4899"
1506
+ sort_key = _hidden_runtime_sort_key(runtime_value, score_value)
1507
+ hidden_sort_prefix = f'<span style="display:none">{sort_key}</span>'
1508
 
1509
  if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
1510
+ return f"{hidden_sort_prefix}{runtime_value:.0f}s"
1511
  elif pd.notna(score_value):
1512
+ return f'{hidden_sort_prefix}<span style="color: {status_color};">Missing</span>'
1513
  else:
1514
+ return f'{hidden_sort_prefix}<span style="color: {status_color};">Not Submitted</span>'
1515
 
 
1516
  df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
1517
 
1518
  return df
tests/test_runtime_sorting.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from leaderboard_transformer import format_runtime_column
4
+
5
+
6
+ def test_runtime_strings_sort_numerically_in_ascending_order():
7
+ df = pd.DataFrame(
8
+ {
9
+ "Average Score": [0.8, 0.8, 0.8, 0.8, None],
10
+ "Average Runtime": [1323.0, 372.0, 410.0, None, None],
11
+ }
12
+ )
13
+
14
+ formatted = format_runtime_column(df.copy(), "Average Runtime")
15
+ runtimes = formatted["Average Runtime"].tolist()
16
+
17
+ assert sorted(runtimes) == [
18
+ runtimes[1],
19
+ runtimes[2],
20
+ runtimes[0],
21
+ runtimes[3],
22
+ runtimes[4],
23
+ ]
24
+
25
+
26
+ def test_runtime_formatting_preserves_visible_labels():
27
+ df = pd.DataFrame(
28
+ {
29
+ "Average Score": [0.8, 0.8, None],
30
+ "Average Runtime": [45.2, None, None],
31
+ }
32
+ )
33
+
34
+ formatted = format_runtime_column(df.copy(), "Average Runtime")
35
+ values = formatted["Average Runtime"].tolist()
36
+
37
+ assert values[0].endswith("45s")
38
+ assert values[1].endswith("Missing</span>")
39
+ assert values[2].endswith("Not Submitted</span>")
40
+ assert 'display:none' in values[0]