Spaces:
Running
Running
| """Collect memory-on vs memory-off summaries for the SENTINEL proof dashboard. | |
| The training pipeline can be run twice: | |
| outputs/ablation/memory_off/monitoring/latest_summary.json | |
| outputs/ablation/memory_on/monitoring/latest_summary.json | |
| This helper reads those summaries and writes a compact | |
| ``memory_ablation.json`` consumed by ``render_training_dashboard.py``. | |
| It is intentionally lightweight so CI can validate the proof-pack contract | |
| without running training. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| from pathlib import Path | |
| from typing import Any, Dict, Iterable, Optional | |
| DEFAULT_CAPTION = "SENTINEL learns from its own oversight mistakes." | |
| DEFAULT_METRICS = ("reward_mean", "detection_rate", "risk_reduction_rate") | |
| def collect_ablation( | |
| root: str | Path = "outputs/ablation", | |
| output_path: str | Path = "outputs/monitoring/memory_ablation.json", | |
| ) -> Dict[str, Any]: | |
| """Read memory-off/on summaries, compute deltas, and write dashboard JSON.""" | |
| root_path = Path(root) | |
| output = Path(output_path) | |
| runs = [ | |
| _load_run(root_path, "memory_off"), | |
| _load_run(root_path, "memory_on"), | |
| ] | |
| comparison = _compare_summaries(runs[0].get("summary", {}), runs[1].get("summary", {}), DEFAULT_METRICS) | |
| payload: Dict[str, Any] = { | |
| "caption": DEFAULT_CAPTION, | |
| "root": str(root_path), | |
| "runs": runs, | |
| "comparison": comparison, | |
| } | |
| output.parent.mkdir(parents=True, exist_ok=True) | |
| output.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8") | |
| return payload | |
| def _load_run(root: Path, label: str) -> Dict[str, Any]: | |
| summary_path = root / label / "monitoring" / "latest_summary.json" | |
| summary = _read_json(summary_path) | |
| return { | |
| "label": label, | |
| "summary_path": str(summary_path), | |
| "summary": summary, | |
| "available": bool(summary), | |
| } | |
| def _compare_summaries( | |
| baseline: Dict[str, Any], | |
| candidate: Dict[str, Any], | |
| metrics: Iterable[str], | |
| ) -> Dict[str, Optional[float]]: | |
| comparison: Dict[str, Optional[float]] = {} | |
| for metric in metrics: | |
| base = _as_float(baseline.get(metric)) | |
| cand = _as_float(candidate.get(metric)) | |
| comparison[f"{metric}_delta"] = None if base is None or cand is None else round(cand - base, 4) | |
| comparison[f"{metric}_memory_off"] = base | |
| comparison[f"{metric}_memory_on"] = cand | |
| return comparison | |
| def _read_json(path: Path) -> Dict[str, Any]: | |
| if not path.exists(): | |
| return {} | |
| try: | |
| data = json.loads(path.read_text(encoding="utf-8")) | |
| except json.JSONDecodeError: | |
| return {} | |
| return data if isinstance(data, dict) else {} | |
| def _as_float(value: Any) -> Optional[float]: | |
| try: | |
| return round(float(value), 4) | |
| except (TypeError, ValueError): | |
| return None | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Collect SENTINEL memory ablation proof data.") | |
| parser.add_argument("--root", default="outputs/ablation", help="Directory containing memory_off/ and memory_on/ runs.") | |
| parser.add_argument( | |
| "--output", | |
| default="outputs/monitoring/memory_ablation.json", | |
| help="Output JSON path for the dashboard renderer.", | |
| ) | |
| args = parser.parse_args() | |
| payload = collect_ablation(args.root, args.output) | |
| print(json.dumps(payload, indent=2, sort_keys=True)) | |
| if __name__ == "__main__": | |
| main() | |