File size: 2,290 Bytes
03815d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""GET /eval, /eval/bootstrap, /eval/leaderboard — research endpoints.

Each endpoint returns a cached eval artifact from ``logs/`` so judges and
researchers can fetch the load-bearing numbers in one HTTP call without
spinning up a GPU. The artifacts themselves are version-pinned in git
(see ``.gitignore`` allow-list).
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

from fastapi import APIRouter, FastAPI, HTTPException

LOGS_DIR = Path(__file__).resolve().parent.parent / "logs"


def _load_json(name: str) -> dict[str, Any]:
    path = LOGS_DIR / name
    if not path.exists():
        raise HTTPException(
            status_code=404,
            detail=(
                f"{name} not found at {path}. Run `make eval-v2 && make bootstrap` "
                "or pull the latest artifacts from the repo."
            ),
        )
    return json.loads(path.read_text())


def build_router() -> APIRouter:
    router = APIRouter(tags=["eval"])

    @router.get("/eval")
    def get_eval() -> dict[str, Any]:
        """Return the v2 eval artifact (`logs/eval_v2.json`)."""
        return _load_json("eval_v2.json")

    @router.get("/eval/bootstrap")
    def get_bootstrap() -> dict[str, Any]:
        """Return the 10k-iter percentile bootstrap CIs (`logs/bootstrap_v2.json`)."""
        return _load_json("bootstrap_v2.json")

    @router.get("/eval/known-novel")
    def get_known_novel() -> dict[str, Any]:
        """Return the known-vs-novel split eval (`logs/eval_v2_known_novel.json`)."""
        return _load_json("eval_v2_known_novel.json")

    @router.get("/eval/redteam")
    def get_redteam() -> dict[str, Any]:
        """Return the rule-based-analyzer red-team eval."""
        return _load_json("analyzer_robustness.json")

    @router.get("/eval/time-to-detection")
    def get_ttd() -> dict[str, Any]:
        """Return the time-to-detection metric on the env-rollout baseline."""
        return _load_json("time_to_detection.json")

    @router.get("/eval/ablation")
    def get_ablation() -> dict[str, Any]:
        """Return the per-rubric ablation study."""
        return _load_json("ablation_study.json")

    return router


def attach_to_app(app: FastAPI) -> None:
    app.include_router(build_router())