| """Episode runner for multi-agent rollouts. |
| |
| Used for: |
| |
| * Generating baseline reward curves before LLM training. |
| * Producing trajectories TRL can consume as (observation, action, reward). |
| * Driving the dashboard demo — each ``EpisodeStep`` is a renderable frame. |
| |
| The runner is policy-agnostic: pass any ``Policy`` (scripted or an LLM-wrapped |
| adapter) and it drives the round-robin turn order until the episode ends. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from dataclasses import dataclass, field |
|
|
| from chaosops.agents.policies import Policy |
| from chaosops.env.environment import ChaosOpsEnvironment |
| from chaosops.env.models import ( |
| AgentRole, |
| ChaosOpsAction, |
| ChaosOpsObservation, |
| FailureType, |
| ) |
| from chaosops.env.world_sim import Scenario |
| from chaosops.rewards.reward_fn import StepRewardBreakdown |
|
|
|
|
| |
| |
| |
|
|
|
|
| @dataclass |
| class EpisodeStep: |
| turn: int |
| role: AgentRole |
| observation: ChaosOpsObservation |
| action: ChaosOpsAction |
| reward: float |
| breakdown: StepRewardBreakdown |
| done: bool |
|
|
|
|
| @dataclass |
| class EpisodeResult: |
| scenario: Scenario |
| steps: list[EpisodeStep] = field(default_factory=list) |
| resolved: bool = False |
| final_step: int = 0 |
| cumulative_reward: float = 0.0 |
| wrong_fixes: int = 0 |
| oversight_flags: list[str] = field(default_factory=list) |
| declared_root_cause: FailureType | None = None |
|
|
| @property |
| def mttr_steps(self) -> int: |
| return self.final_step if self.resolved else -1 |
|
|
|
|
| |
| |
| |
|
|
|
|
| def run_episode( |
| env: ChaosOpsEnvironment, |
| scenario: Scenario, |
| policy_by_role: dict[AgentRole, Policy], |
| *, |
| max_turns: int | None = None, |
| ) -> EpisodeResult: |
| """Run one full episode with a per-role policy map. |
| |
| Parameters |
| ---------- |
| env : |
| A fresh or reusable :class:`ChaosOpsEnvironment`. The runner calls |
| ``reset`` so prior state is discarded. |
| scenario : |
| The incident configuration to play. |
| policy_by_role : |
| Maps each role to the policy that should drive it. Missing roles |
| fall back to ``NOOP``. |
| max_turns : |
| Hard upper bound on total agent turns. Defaults to ``scenario.max_steps`` |
| × number of roles so every role gets proportional airtime. |
| """ |
| observation = env.reset(scenario=scenario) |
| result = EpisodeResult(scenario=scenario) |
| turn_limit = max_turns or scenario.max_steps * len(env.turn_order) |
|
|
| for turn in range(turn_limit): |
| role = observation.turn_role |
| policy = policy_by_role.get(role) |
| if policy is None: |
| action = ChaosOpsAction(role=role, action_type=_noop_action_type()) |
| else: |
| action = policy(observation, role) |
| action = action.model_copy(update={"role": role}) |
|
|
| next_obs = env.step(action) |
| breakdown = env.last_breakdown |
| assert breakdown is not None, "breakdown must be populated after step" |
|
|
| result.steps.append( |
| EpisodeStep( |
| turn=turn, |
| role=role, |
| observation=observation, |
| action=action, |
| reward=next_obs.reward or 0.0, |
| breakdown=breakdown, |
| done=next_obs.done, |
| ) |
| ) |
|
|
| if next_obs.done: |
| observation = next_obs |
| break |
| observation = next_obs |
|
|
| result.resolved = env.state.resolved |
| result.final_step = env.state.step_count |
| result.cumulative_reward = env.state.cumulative_reward |
| result.wrong_fixes = env.state.wrong_fixes |
| result.oversight_flags = list(env.state.oversight_flags) |
| result.declared_root_cause = env.state.declared_root_cause |
| return result |
|
|
|
|
| def run_batch( |
| scenarios: list[Scenario], |
| policy_by_role: dict[AgentRole, Policy], |
| ) -> list[EpisodeResult]: |
| """Evaluate a policy map across multiple scenarios — used for baselines.""" |
| env = ChaosOpsEnvironment() |
| return [run_episode(env, sc, policy_by_role) for sc in scenarios] |
|
|
|
|
| def _noop_action_type(): |
| |
| |
| from chaosops.env.models import ActionType |
|
|
| return ActionType.NOOP |
|
|
|
|
| __all__ = [ |
| "EpisodeStep", |
| "EpisodeResult", |
| "run_episode", |
| "run_batch", |
| ] |
|
|