Spaces:

modelbuilderhq
/

ghostexec

Sleeping

App Files Files Community

ghostexec / graders.py

modelbuilderhq

Upload folder using huggingface_hub

ee21104 verified 11 days ago

raw

history blame contribute delete

4.21 kB

	"""
	Public trajectory graders for OpenEnv Phase 2 / HF deep validation.

	These are episode-level scores (strictly inside (0, 1)), separate from per-step
	rewards in `server/reward.py`. The hackathon validator reads `openenv.yaml`
	`tasks[].grader` and calls these functions with trajectory dicts.
	"""
	from __future__ import annotations

	import math
	from typing import List

	STRICT_MIN = 0.01
	STRICT_MAX = 0.99


	def _bounded(value: float) -> float:
	try:
	v = round(float(value), 4)
	except (TypeError, ValueError):
	return 0.5
	if not math.isfinite(v):
	return 0.5
	return min(max(v, STRICT_MIN), STRICT_MAX)


	def _as_reward_list(trajectory: dict \| None) -> List[float]:
	payload = trajectory or {}
	if not isinstance(payload, dict):
	return []
	rewards = payload.get("rewards")
	if isinstance(rewards, list) and rewards:
	out: List[float] = []
	for r in rewards:
	try:
	rv = float(r)
	except (TypeError, ValueError):
	continue
	if math.isfinite(rv):
	out.append(rv)
	return out
	if "score" in payload:
	try:
	v = float(payload["score"])
	return [v] if math.isfinite(v) else []
	except (TypeError, ValueError):
	return []
	reward = payload.get("reward")
	if isinstance(reward, dict) and "total" in reward:
	try:
	v = float(reward["total"])
	return [v] if math.isfinite(v) else []
	except (TypeError, ValueError):
	return []
	if reward is not None:
	try:
	v = float(reward)
	return [v] if math.isfinite(v) else []
	except (TypeError, ValueError):
	return []
	return []


	def _profile(reward: float) -> str:
	if reward <= 0.05:
	return "unsafe_miss"
	if reward <= 0.20:
	return "bad_call"
	if reward < 0.50:
	return "weak"
	if reward < 0.80:
	return "workable"
	if reward < 0.95:
	return "strong"
	return "expert"


	def _score_episode(
	rewards: List[float],
	*,
	miss_cost: float,
	overcall_cost: float,
	stability_gain: float,
	expertise_gain: float,
	) -> float:
	if not rewards:
	return _bounded(0.5)
	labels = [_profile(r) for r in rewards]
	mean_r = sum(rewards) / len(rewards)
	n = len(rewards)
	miss = labels.count("unsafe_miss")
	bad = labels.count("bad_call")
	weak = labels.count("weak")
	strong = labels.count("strong") + labels.count("expert")
	expert = labels.count("expert")

	downward = (
	min(miss * miss_cost, 0.35)
	+ min(bad * overcall_cost, 0.15)
	+ min(weak * 0.015, 0.06)
	)
	upward = 0.0
	if strong / n >= 0.80:
	upward += stability_gain
	if expert / n >= 0.60:
	upward += expertise_gain

	return _bounded(mean_r - downward + upward)


	def phase2_core_grader(trajectory: dict \| None = None) -> float:
	"""Easy tier — dense default inbox (scenarios/phase2_core.json)."""
	return _score_episode(
	_as_reward_list(trajectory),
	miss_cost=0.12,
	overcall_cost=0.03,
	stability_gain=0.05,
	expertise_gain=0.01,
	)


	def monday_morning_grader(trajectory: dict \| None = None) -> float:
	"""Medium tier — stacked Monday conflicts (scenarios/monday_morning.json)."""
	return _score_episode(
	_as_reward_list(trajectory),
	miss_cost=0.09,
	overcall_cost=0.04,
	stability_gain=0.03,
	expertise_gain=0.02,
	)


	def dinner_disaster_grader(trajectory: dict \| None = None) -> float:
	"""Hard tier — personal/professional collision (scenarios/dinner_disaster.json)."""
	return _score_episode(
	_as_reward_list(trajectory),
	miss_cost=0.07,
	overcall_cost=0.03,
	stability_gain=0.02,
	expertise_gain=0.04,
	)


	__all__ = [
	"phase2_core_grader",
	"monday_morning_grader",
	"dinner_disaster_grader",
	"STRICT_MIN",
	"STRICT_MAX",
	]