Spaces:
Running
Running
| """GET /eval, /eval/bootstrap, /eval/leaderboard — research endpoints. | |
| Each endpoint returns a cached eval artifact from ``logs/`` so judges and | |
| researchers can fetch the load-bearing numbers in one HTTP call without | |
| spinning up a GPU. The artifacts themselves are version-pinned in git | |
| (see ``.gitignore`` allow-list). | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from typing import Any | |
| from fastapi import APIRouter, FastAPI, HTTPException | |
| LOGS_DIR = Path(__file__).resolve().parent.parent / "logs" | |
| def _load_json(name: str) -> dict[str, Any]: | |
| path = LOGS_DIR / name | |
| if not path.exists(): | |
| raise HTTPException( | |
| status_code=404, | |
| detail=( | |
| f"{name} not found at {path}. Run `make eval-v2 && make bootstrap` " | |
| "or pull the latest artifacts from the repo." | |
| ), | |
| ) | |
| return json.loads(path.read_text()) | |
| def build_router() -> APIRouter: | |
| router = APIRouter(tags=["eval"]) | |
| def get_eval() -> dict[str, Any]: | |
| """Return the v2 eval artifact (`logs/eval_v2.json`).""" | |
| return _load_json("eval_v2.json") | |
| def get_bootstrap() -> dict[str, Any]: | |
| """Return the 10k-iter percentile bootstrap CIs (`logs/bootstrap_v2.json`).""" | |
| return _load_json("bootstrap_v2.json") | |
| def get_known_novel() -> dict[str, Any]: | |
| """Return the known-vs-novel split eval (`logs/eval_v2_known_novel.json`).""" | |
| return _load_json("eval_v2_known_novel.json") | |
| def get_redteam() -> dict[str, Any]: | |
| """Return the rule-based-analyzer red-team eval.""" | |
| return _load_json("analyzer_robustness.json") | |
| def get_ttd() -> dict[str, Any]: | |
| """Return the time-to-detection metric on the env-rollout baseline.""" | |
| return _load_json("time_to_detection.json") | |
| def get_ablation() -> dict[str, Any]: | |
| """Return the per-rubric ablation study.""" | |
| return _load_json("ablation_study.json") | |
| return router | |
| def attach_to_app(app: FastAPI) -> None: | |
| app.include_router(build_router()) | |