Spaces:
Running
Running
File size: 2,235 Bytes
fbb0927 96894d9 fbb0927 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | """
Abstract base grader interface.
All task graders must inherit from this and implement score().
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from server.models import EpisodeState
class BaseGrader(ABC):
"""
Abstract grader base class.
A grader evaluates the complete episode history and produces
a final score in [0.0, 1.0].
Unlike the reward function (which fires after every step),
the grader fires once at episode end and produces the
official score used by judges.
"""
@abstractmethod
def score(self, state: EpisodeState) -> float:
"""
Score the completed episode.
Args:
state: Final EpisodeState including full action_history
Returns:
float in [0.0, 1.0] — the official episode score
"""
raise NotImplementedError
def _clamp(self, value: float) -> float:
"""Clamp score to valid range (0.0, 1.0) — strictly between 0 and 1."""
return round(max(0.0001, min(0.9999, value)), 4)
def _get_actions_of_type(
self, state: EpisodeState, action_type: str
) -> list[dict]:
"""Return all actions of a given type from episode history."""
return [
a for a in state.action_history
if a.get("action_type") == action_type
]
def _was_action_taken(self, state: EpisodeState, action_type: str) -> bool:
"""Check if an action type was taken at any point in the episode."""
return any(
a.get("action_type") == action_type
for a in state.action_history
)
def _get_first_value(
self, state: EpisodeState, action_type: str
) -> str | None:
"""Get the value of the first action of a given type."""
actions = self._get_actions_of_type(state, action_type)
return actions[0].get("value") if actions else None
def _episode_resolved(self, state: EpisodeState) -> bool:
"""Check if agent explicitly resolved the episode."""
return self._was_action_taken(state, "resolve")
def _steps_used(self, state: EpisodeState) -> int:
"""Return number of steps taken."""
return state.step_count
|