File size: 3,874 Bytes
77eb356
a50dd28
77eb356
a50dd28
 
 
 
 
77eb356
 
 
a50dd28
 
77eb356
 
 
 
a50dd28
77eb356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a50dd28
 
 
 
 
 
 
77eb356
 
 
 
 
 
 
a50dd28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77eb356
 
 
 
 
 
 
 
 
 
 
 
 
a50dd28
77eb356
 
 
a50dd28
 
 
 
 
 
 
 
 
 
 
 
77eb356
 
 
a50dd28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77eb356
 
a50dd28
 
 
 
 
77eb356
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
server/models.py — Typed Pydantic v2 models for the OpenEnv interface.

Implements the full OpenEnv spec:
  - Action: typed action with parameters
  - Observation: full environment state visible to the agent
  - Reward: score + reason + cumulative (with backward-compatible 'value' alias)
  - EpisodeState: internal state for GET /state
"""

from __future__ import annotations

from pydantic import BaseModel, Field, computed_field


class ActionParameters(BaseModel):
    """Flexible parameter bag — different action types use different fields."""

    service: str | None = None
    severity: str | None = None
    failure_mode: str | None = None
    summary: str | None = None
    target_version: str | None = None
    replicas: int | None = None
    flag: str | None = None
    runbook_action: str | None = None
    target: str | None = None
    reasoning: str | None = None

    model_config = {"extra": "allow"}


class Action(BaseModel):
    """An action submitted by the agent to the environment.
    
    Attributes:
        action_type: One of the valid action types (query_logs, check_metrics, etc.)
        parameters: Action-specific parameters
    """

    action_type: str
    parameters: ActionParameters = Field(default_factory=ActionParameters)

    model_config = {"extra": "allow"}


class Observation(BaseModel):
    """Observation returned after reset() or step().
    
    Contains all information visible to the agent at this point in the episode.
    
    Attributes:
        episode_id: Unique episode UUID
        task_id: Active task identifier
        scenario_id: Current scenario identifier
        step_count: Number of steps taken so far
        max_steps: Maximum steps allowed
        incident_summary: Human-readable incident description
        alert: Alert payload with severity, symptoms, affected services
        available_actions: List of valid action types for this task
        queried_data: All tool responses gathered so far (evidence)
        cumulative_reward: Running reward total
        done: Whether the episode has ended
        feedback: Per-step feedback string
        known_services: Exact service names valid for actions
        last_action_error: Error message if last action was invalid (None if OK)
    """

    episode_id: str
    task_id: str
    scenario_id: str
    step_count: int
    max_steps: int
    incident_summary: str
    alert: dict
    available_actions: list[str]
    queried_data: dict
    cumulative_reward: float
    done: bool
    feedback: str
    known_services: list[str] = Field(default_factory=list)
    last_action_error: str | None = None


class Reward(BaseModel):
    """Reward signal returned after each step().

    Primary field is ``score`` (the actual reward value).
    ``value`` is a computed alias for backward compatibility with OpenEnv validators.
    
    Attributes:
        score: The reward value for this step
        reason: Human-readable explanation of the reward
        cumulative: Running total of all rewards in the episode
    """

    score: float
    reason: str
    cumulative: float

    @computed_field
    @property
    def value(self) -> float:
        """Backward-compatible alias for *score*."""
        return self.score


class StepResult(BaseModel):
    """Result returned by POST /step — matches OpenEnv spec."""
    
    observation: Observation
    reward: Reward
    done: bool
    info: dict = Field(default_factory=dict)


class EpisodeState(BaseModel):
    """Full episode state returned by GET /state.
    
    Contains internal bookkeeping not shown to agents directly.
    """

    episode_id: str
    task_id: str
    scenario_id: str
    step_count: int
    max_steps: int
    action_history: list[dict]
    queried_data: dict
    submitted: bool
    resolved: bool
    done: bool
    cumulative_reward: float
    feedback: str