adithya9903's picture
Deploy PolyGuard HF training Space
fd0c71a verified
"""Explanation grounding evaluation."""
from __future__ import annotations
from app.evaluation.simulator_rollouts import run_rollouts
def explainability_eval() -> dict[str, float]:
rows = run_rollouts(episodes=8, difficulty="medium")
if not rows:
return {"grounding_rate": 0.0}
grounding_scores = [
float((row.get("reward_breakdown", {}) or {}).get("explanation_grounding_score", 0.0))
for row in rows
]
return {"grounding_rate": round(sum(grounding_scores) / max(1, len(grounding_scores)), 6)}