Elliot89's picture
Initial commit (from agent)
a50dd28
"""
server/models.py — Typed Pydantic v2 models for the OpenEnv interface.
Implements the full OpenEnv spec:
- Action: typed action with parameters
- Observation: full environment state visible to the agent
- Reward: score + reason + cumulative (with backward-compatible 'value' alias)
- EpisodeState: internal state for GET /state
"""
from __future__ import annotations
from pydantic import BaseModel, Field, computed_field
class ActionParameters(BaseModel):
"""Flexible parameter bag — different action types use different fields."""
service: str | None = None
severity: str | None = None
failure_mode: str | None = None
summary: str | None = None
target_version: str | None = None
replicas: int | None = None
flag: str | None = None
runbook_action: str | None = None
target: str | None = None
reasoning: str | None = None
model_config = {"extra": "allow"}
class Action(BaseModel):
"""An action submitted by the agent to the environment.
Attributes:
action_type: One of the valid action types (query_logs, check_metrics, etc.)
parameters: Action-specific parameters
"""
action_type: str
parameters: ActionParameters = Field(default_factory=ActionParameters)
model_config = {"extra": "allow"}
class Observation(BaseModel):
"""Observation returned after reset() or step().
Contains all information visible to the agent at this point in the episode.
Attributes:
episode_id: Unique episode UUID
task_id: Active task identifier
scenario_id: Current scenario identifier
step_count: Number of steps taken so far
max_steps: Maximum steps allowed
incident_summary: Human-readable incident description
alert: Alert payload with severity, symptoms, affected services
available_actions: List of valid action types for this task
queried_data: All tool responses gathered so far (evidence)
cumulative_reward: Running reward total
done: Whether the episode has ended
feedback: Per-step feedback string
known_services: Exact service names valid for actions
last_action_error: Error message if last action was invalid (None if OK)
"""
episode_id: str
task_id: str
scenario_id: str
step_count: int
max_steps: int
incident_summary: str
alert: dict
available_actions: list[str]
queried_data: dict
cumulative_reward: float
done: bool
feedback: str
known_services: list[str] = Field(default_factory=list)
last_action_error: str | None = None
class Reward(BaseModel):
"""Reward signal returned after each step().
Primary field is ``score`` (the actual reward value).
``value`` is a computed alias for backward compatibility with OpenEnv validators.
Attributes:
score: The reward value for this step
reason: Human-readable explanation of the reward
cumulative: Running total of all rewards in the episode
"""
score: float
reason: str
cumulative: float
@computed_field
@property
def value(self) -> float:
"""Backward-compatible alias for *score*."""
return self.score
class StepResult(BaseModel):
"""Result returned by POST /step — matches OpenEnv spec."""
observation: Observation
reward: Reward
done: bool
info: dict = Field(default_factory=dict)
class EpisodeState(BaseModel):
"""Full episode state returned by GET /state.
Contains internal bookkeeping not shown to agents directly.
"""
episode_id: str
task_id: str
scenario_id: str
step_count: int
max_steps: int
action_history: list[dict]
queried_data: dict
submitted: bool
resolved: bool
done: bool
cumulative_reward: float
feedback: str