Spaces:
Running
Running
File size: 2,290 Bytes
03815d6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | """GET /eval, /eval/bootstrap, /eval/leaderboard — research endpoints.
Each endpoint returns a cached eval artifact from ``logs/`` so judges and
researchers can fetch the load-bearing numbers in one HTTP call without
spinning up a GPU. The artifacts themselves are version-pinned in git
(see ``.gitignore`` allow-list).
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from fastapi import APIRouter, FastAPI, HTTPException
LOGS_DIR = Path(__file__).resolve().parent.parent / "logs"
def _load_json(name: str) -> dict[str, Any]:
path = LOGS_DIR / name
if not path.exists():
raise HTTPException(
status_code=404,
detail=(
f"{name} not found at {path}. Run `make eval-v2 && make bootstrap` "
"or pull the latest artifacts from the repo."
),
)
return json.loads(path.read_text())
def build_router() -> APIRouter:
router = APIRouter(tags=["eval"])
@router.get("/eval")
def get_eval() -> dict[str, Any]:
"""Return the v2 eval artifact (`logs/eval_v2.json`)."""
return _load_json("eval_v2.json")
@router.get("/eval/bootstrap")
def get_bootstrap() -> dict[str, Any]:
"""Return the 10k-iter percentile bootstrap CIs (`logs/bootstrap_v2.json`)."""
return _load_json("bootstrap_v2.json")
@router.get("/eval/known-novel")
def get_known_novel() -> dict[str, Any]:
"""Return the known-vs-novel split eval (`logs/eval_v2_known_novel.json`)."""
return _load_json("eval_v2_known_novel.json")
@router.get("/eval/redteam")
def get_redteam() -> dict[str, Any]:
"""Return the rule-based-analyzer red-team eval."""
return _load_json("analyzer_robustness.json")
@router.get("/eval/time-to-detection")
def get_ttd() -> dict[str, Any]:
"""Return the time-to-detection metric on the env-rollout baseline."""
return _load_json("time_to_detection.json")
@router.get("/eval/ablation")
def get_ablation() -> dict[str, Any]:
"""Return the per-rubric ablation study."""
return _load_json("ablation_study.json")
return router
def attach_to_app(app: FastAPI) -> None:
app.include_router(build_router())
|