Spaces:

TheJackBright
/

polyguard-openenv

Running

Deploy PolyGuard OpenEnv Space

877add7 verified 13 days ago

796 Bytes

	"""Dosing-specific evaluation."""

	from __future__ import annotations

	from app.evaluation.simulator_rollouts import run_rollouts


	def dosing_eval() -> dict[str, float]:
	rows = run_rollouts(episodes=8, difficulty="hard", sub_environment="PRECISION_DOSING")
	if not rows:
	return {"target_attainment": 0.0, "toxicity_avoidance": 0.0}

	dosing_quality = [
	float((row.get("reward_breakdown", {}) or {}).get("dosing_quality_score", 0.0))
	for row in rows
	]
	target_attainment = sum(dosing_quality) / max(1, len(dosing_quality))
	toxicity_avoidance = sum(1.0 for row in rows if bool(row.get("legal", False))) / len(rows)
	return {
	"target_attainment": round(target_attainment, 6),
	"toxicity_avoidance": round(toxicity_avoidance, 6),
	}