Spaces:
Running
Running
| """ | |
| base_env.py | |
| ----------- | |
| Abstract base class that every task environment must implement. | |
| Follows the OpenEnv interface: reset / step / state. | |
| """ | |
| from abc import ABC, abstractmethod | |
| from typing import Any, Dict | |
| from env.schemas import Observation, Action, StepResult, ResetResult, StateResult | |
| class BaseEnv(ABC): | |
| """ | |
| OpenEnv-compliant base environment. | |
| Concrete task environments should subclass this and implement: | |
| - reset() β ResetResult | |
| - step() β StepResult | |
| - state() β StateResult | |
| """ | |
| def reset(self, seed: int | None = None) -> ResetResult: | |
| """ | |
| Reset the environment to a fresh episode. | |
| Parameters | |
| ---------- | |
| seed : optional RNG seed for reproducibility | |
| Returns | |
| ------- | |
| ResetResult with the initial Observation and episode info. | |
| """ | |
| ... | |
| def step(self, action: Action) -> StepResult: | |
| """ | |
| Apply an action and advance the episode by one step. | |
| Parameters | |
| ---------- | |
| action : Action β typed agent action | |
| Returns | |
| ------- | |
| StepResult containing: | |
| - observation : updated Observation | |
| - reward : Reward for this step | |
| - done : True when the episode is over | |
| - info : auxiliary diagnostic information | |
| """ | |
| ... | |
| def state(self) -> StateResult: | |
| """ | |
| Return the full internal state (for debugging / graders). | |
| Should NOT be used by the agent during evaluation. | |
| Returns | |
| ------- | |
| StateResult β internal episode state snapshot. | |
| """ | |
| ... | |
| # ------------------------------------------------------------------ | |
| # Optional helpers subclasses may override | |
| # ------------------------------------------------------------------ | |
| def render(self) -> str: | |
| """Human-readable rendering of the current state.""" | |
| s = self.state() | |
| return ( | |
| f"Task: {s.task_id} | Contract: {s.contract_name} | " | |
| f"Step: {s.step_count} | Reward: {s.cumulative_reward:.2f} | " | |
| f"Done: {s.done}" | |
| ) | |
| def action_space_description(self) -> Dict[str, Any]: | |
| """Returns a JSON-serialisable description of the action space.""" | |
| return {} | |
| def observation_space_description(self) -> Dict[str, Any]: | |
| """Returns a JSON-serialisable description of the observation space.""" | |
| return {} | |