File size: 2,235 Bytes
fbb0927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96894d9
 
fbb0927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
Abstract base grader interface.
All task graders must inherit from this and implement score().
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from server.models import EpisodeState


class BaseGrader(ABC):
    """
    Abstract grader base class.

    A grader evaluates the complete episode history and produces
    a final score in [0.0, 1.0].

    Unlike the reward function (which fires after every step),
    the grader fires once at episode end and produces the
    official score used by judges.
    """

    @abstractmethod
    def score(self, state: EpisodeState) -> float:
        """
        Score the completed episode.

        Args:
            state: Final EpisodeState including full action_history

        Returns:
            float in [0.0, 1.0] — the official episode score
        """
        raise NotImplementedError

    def _clamp(self, value: float) -> float:
        """Clamp score to valid range (0.0, 1.0) — strictly between 0 and 1."""
        return round(max(0.0001, min(0.9999, value)), 4)

    def _get_actions_of_type(
        self, state: EpisodeState, action_type: str
    ) -> list[dict]:
        """Return all actions of a given type from episode history."""
        return [
            a for a in state.action_history
            if a.get("action_type") == action_type
        ]

    def _was_action_taken(self, state: EpisodeState, action_type: str) -> bool:
        """Check if an action type was taken at any point in the episode."""
        return any(
            a.get("action_type") == action_type
            for a in state.action_history
        )

    def _get_first_value(
        self, state: EpisodeState, action_type: str
    ) -> str | None:
        """Get the value of the first action of a given type."""
        actions = self._get_actions_of_type(state, action_type)
        return actions[0].get("value") if actions else None

    def _episode_resolved(self, state: EpisodeState) -> bool:
        """Check if agent explicitly resolved the episode."""
        return self._was_action_taken(state, "resolve")

    def _steps_used(self, state: EpisodeState) -> int:
        """Return number of steps taken."""
        return state.step_count