Spaces:
Running
Running
| """ | |
| Abstract base grader interface. | |
| All task graders must inherit from this and implement score(). | |
| """ | |
| from __future__ import annotations | |
| from abc import ABC, abstractmethod | |
| from server.models import EpisodeState | |
| class BaseGrader(ABC): | |
| """ | |
| Abstract grader base class. | |
| A grader evaluates the complete episode history and produces | |
| a final score in [0.0, 1.0]. | |
| Unlike the reward function (which fires after every step), | |
| the grader fires once at episode end and produces the | |
| official score used by judges. | |
| """ | |
| def score(self, state: EpisodeState) -> float: | |
| """ | |
| Score the completed episode. | |
| Args: | |
| state: Final EpisodeState including full action_history | |
| Returns: | |
| float in [0.0, 1.0] — the official episode score | |
| """ | |
| raise NotImplementedError | |
| def _clamp(self, value: float) -> float: | |
| """Clamp score to valid range (0.0, 1.0) — strictly between 0 and 1.""" | |
| return round(max(0.0001, min(0.9999, value)), 4) | |
| def _get_actions_of_type( | |
| self, state: EpisodeState, action_type: str | |
| ) -> list[dict]: | |
| """Return all actions of a given type from episode history.""" | |
| return [ | |
| a for a in state.action_history | |
| if a.get("action_type") == action_type | |
| ] | |
| def _was_action_taken(self, state: EpisodeState, action_type: str) -> bool: | |
| """Check if an action type was taken at any point in the episode.""" | |
| return any( | |
| a.get("action_type") == action_type | |
| for a in state.action_history | |
| ) | |
| def _get_first_value( | |
| self, state: EpisodeState, action_type: str | |
| ) -> str | None: | |
| """Get the value of the first action of a given type.""" | |
| actions = self._get_actions_of_type(state, action_type) | |
| return actions[0].get("value") if actions else None | |
| def _episode_resolved(self, state: EpisodeState) -> bool: | |
| """Check if agent explicitly resolved the episode.""" | |
| return self._was_action_taken(state, "resolve") | |
| def _steps_used(self, state: EpisodeState) -> int: | |
| """Return number of steps taken.""" | |
| return state.step_count | |