File size: 3,023 Bytes
d814291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from __future__ import annotations

import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any


def _utc_now() -> str:
    return datetime.now(tz=timezone.utc).replace(microsecond=0).isoformat()


def load_leaderboard(path: str | Path) -> list[dict[str, Any]]:
    file_path = Path(path)
    if not file_path.exists():
        return []
    with file_path.open("r", encoding="utf-8") as f:
        data = json.load(f)
    if not isinstance(data, list):
        return []
    return data


def save_leaderboard(path: str | Path, records: list[dict[str, Any]]) -> None:
    file_path = Path(path)
    file_path.parent.mkdir(parents=True, exist_ok=True)
    with file_path.open("w", encoding="utf-8") as f:
        json.dump(records, f, indent=2, sort_keys=True)


def _metric_value(record: dict[str, Any], sort_by: str) -> float:
    metrics = record.get("metrics", {})
    return float(metrics.get(sort_by, 0.0))


def sorted_leaderboard(records: list[dict[str, Any]], sort_by: str = "leaderboard_score") -> list[dict[str, Any]]:
    return sorted(records, key=lambda r: _metric_value(r, sort_by), reverse=True)


def append_leaderboard_record(
    path: str | Path,
    summary: dict[str, Any],
    episodes: int,
    run_name: str | None = None,
    config: dict[str, Any] | None = None,
) -> dict[str, Any]:
    records = load_leaderboard(path)
    run_id = f"run_{len(records) + 1:04d}"
    record = {
        "run_id": run_id,
        "run_name": run_name or run_id,
        "created_at": _utc_now(),
        "episodes": int(episodes),
        "config": config or {},
        "metrics": summary,
    }
    records.append(record)
    save_leaderboard(path, records)
    return record


def render_leaderboard_table(records: list[dict[str, Any]], top_k: int = 20, sort_by: str = "leaderboard_score") -> str:
    ranked = sorted_leaderboard(records, sort_by=sort_by)[:top_k]
    header = "| rank | run | score | success | graph_f1 | retrieval | structural | spawn | reward | tool_eff |\n"
    sep = "|---|---|---:|---:|---:|---:|---:|---:|---:|---:|\n"
    rows: list[str] = []
    for idx, rec in enumerate(ranked, start=1):
        m = rec.get("metrics", {})
        rows.append(
            "| {rank} | {run} | {score:.4f} | {succ:.3f} | {f1:.3f} | {retrieval:.3f} | {structural:.3f} | {spawn:.3f} | {reward:.3f} | {tool:.3f} |".format(
                rank=idx,
                run=rec.get("run_name", rec.get("run_id", "run")),
                score=float(m.get("leaderboard_score", 0.0)),
                succ=float(m.get("task_success_rate", 0.0)),
                f1=float(m.get("avg_graph_f1", 0.0)),
                retrieval=float(m.get("retrieval_signal", 0.0)),
                structural=float(m.get("structural_signal", 0.0)),
                spawn=float(m.get("spawn_signal", 0.0)),
                reward=float(m.get("avg_reward", 0.0)),
                tool=float(m.get("tool_efficiency", 0.0)),
            )
        )
    return header + sep + "\n".join(rows)