immortalindeed commited on
Commit
bdb64b6
·
1 Parent(s): 9bb611a

Fix UI leaderboard fields and commit uv.lock

Browse files
Files changed (3) hide show
  1. .gitignore +0 -1
  2. server/benchmark_store.py +12 -5
  3. uv.lock +0 -0
.gitignore CHANGED
@@ -13,4 +13,3 @@ test_output.txt
13
  unnecessary/
14
  results/
15
  .pytest_cache/
16
- uv.lock
 
13
  unnecessary/
14
  results/
15
  .pytest_cache/
 
server/benchmark_store.py CHANGED
@@ -39,10 +39,10 @@ def append_result(model: str, model_id: str, scores: Dict[str, float]) -> Dict:
39
  """Add a new benchmark result and persist to disk. Returns the saved entry."""
40
  avg = round(sum(scores.values()) / max(len(scores), 1), 4)
41
  entry = {
42
- 'model': model,
43
  'model_id': model_id,
44
  'scores': scores,
45
- 'avg': avg,
46
  'type': 'full_run',
47
  'timestamp': datetime.utcnow().isoformat(),
48
  }
@@ -55,6 +55,11 @@ def append_result(model: str, model_id: str, scores: Dict[str, float]) -> Dict:
55
  def get_all() -> List[Dict]:
56
  """Return all benchmark results, newest first."""
57
  results = _load()
 
 
 
 
 
58
  return sorted(results, key=lambda x: x.get('timestamp', ''), reverse=True)
59
 
60
 
@@ -63,7 +68,9 @@ def get_leaderboard() -> List[Dict]:
63
  results = _load()
64
  best: Dict[str, Dict] = {}
65
  for r in results:
66
- mid = r.get('model_id', r.get('model', 'unknown'))
67
- if mid not in best or r.get('avg', 0) > best[mid].get('avg', 0):
 
 
68
  best[mid] = r
69
- return sorted(best.values(), key=lambda x: x.get('avg', 0), reverse=True)
 
39
  """Add a new benchmark result and persist to disk. Returns the saved entry."""
40
  avg = round(sum(scores.values()) / max(len(scores), 1), 4)
41
  entry = {
42
+ 'model_name': model,
43
  'model_id': model_id,
44
  'scores': scores,
45
+ 'average': avg,
46
  'type': 'full_run',
47
  'timestamp': datetime.utcnow().isoformat(),
48
  }
 
55
  def get_all() -> List[Dict]:
56
  """Return all benchmark results, newest first."""
57
  results = _load()
58
+ for r in results:
59
+ if 'average' not in r and 'avg' in r:
60
+ r['average'] = r['avg']
61
+ if 'model_name' not in r and 'model' in r:
62
+ r['model_name'] = r['model']
63
  return sorted(results, key=lambda x: x.get('timestamp', ''), reverse=True)
64
 
65
 
 
68
  results = _load()
69
  best: Dict[str, Dict] = {}
70
  for r in results:
71
+ mid = r.get('model_id', r.get('model_name', r.get('model', 'unknown')))
72
+ val = r.get('average', r.get('avg', 0))
73
+ best_val = best[mid].get('average', best[mid].get('avg', 0)) if mid in best else -1
74
+ if mid not in best or val > best_val:
75
  best[mid] = r
76
+ return sorted(best.values(), key=lambda x: x.get('average', x.get('avg', 0)), reverse=True)
uv.lock ADDED
The diff for this file is too large to render. See raw diff