Spaces:

helloAK96
/

chaosops

Running

App Files Files Community

chaosops / agents /runner.py

helloAK96

Initializing space

83136ac 14 days ago

raw

history blame contribute delete

4.61 kB

	"""Episode runner for multi-agent rollouts.

	Used for:

	* Generating baseline reward curves before LLM training.
	* Producing trajectories TRL can consume as (observation, action, reward).
	* Driving the dashboard demo — each ``EpisodeStep`` is a renderable frame.

	The runner is policy-agnostic: pass any ``Policy`` (scripted or an LLM-wrapped
	adapter) and it drives the round-robin turn order until the episode ends.
	"""

	from __future__ import annotations

	from dataclasses import dataclass, field

	from chaosops.agents.policies import Policy
	from chaosops.env.environment import ChaosOpsEnvironment
	from chaosops.env.models import (
	AgentRole,
	ChaosOpsAction,
	ChaosOpsObservation,
	FailureType,
	)
	from chaosops.env.world_sim import Scenario
	from chaosops.rewards.reward_fn import StepRewardBreakdown


	# ---------------------------------------------------------------------------
	# Trajectory types
	# ---------------------------------------------------------------------------


	@dataclass
	class EpisodeStep:
	turn: int
	role: AgentRole
	observation: ChaosOpsObservation
	action: ChaosOpsAction
	reward: float
	breakdown: StepRewardBreakdown
	done: bool


	@dataclass
	class EpisodeResult:
	scenario: Scenario
	steps: list[EpisodeStep] = field(default_factory=list)
	resolved: bool = False
	final_step: int = 0
	cumulative_reward: float = 0.0
	wrong_fixes: int = 0
	oversight_flags: list[str] = field(default_factory=list)
	declared_root_cause: FailureType \| None = None

	@property
	def mttr_steps(self) -> int:
	return self.final_step if self.resolved else -1


	# ---------------------------------------------------------------------------
	# Runner
	# ---------------------------------------------------------------------------


	def run_episode(
	env: ChaosOpsEnvironment,
	scenario: Scenario,
	policy_by_role: dict[AgentRole, Policy],
	*,
	max_turns: int \| None = None,
	) -> EpisodeResult:
	"""Run one full episode with a per-role policy map.

	Parameters
	----------
	env :
	A fresh or reusable :class:`ChaosOpsEnvironment`. The runner calls
	``reset`` so prior state is discarded.
	scenario :
	The incident configuration to play.
	policy_by_role :
	Maps each role to the policy that should drive it. Missing roles
	fall back to ``NOOP``.
	max_turns :
	Hard upper bound on total agent turns. Defaults to ``scenario.max_steps``
	× number of roles so every role gets proportional airtime.
	"""
	observation = env.reset(scenario=scenario)
	result = EpisodeResult(scenario=scenario)
	turn_limit = max_turns or scenario.max_steps * len(env.turn_order)

	for turn in range(turn_limit):
	role = observation.turn_role
	policy = policy_by_role.get(role)
	if policy is None:
	action = ChaosOpsAction(role=role, action_type=_noop_action_type())
	else:
	action = policy(observation, role)
	action = action.model_copy(update={"role": role})

	next_obs = env.step(action)
	breakdown = env.last_breakdown
	assert breakdown is not None, "breakdown must be populated after step"

	result.steps.append(
	EpisodeStep(
	turn=turn,
	role=role,
	observation=observation,
	action=action,
	reward=next_obs.reward or 0.0,
	breakdown=breakdown,
	done=next_obs.done,
	)
	)

	if next_obs.done:
	observation = next_obs
	break
	observation = next_obs

	result.resolved = env.state.resolved
	result.final_step = env.state.step_count
	result.cumulative_reward = env.state.cumulative_reward
	result.wrong_fixes = env.state.wrong_fixes
	result.oversight_flags = list(env.state.oversight_flags)
	result.declared_root_cause = env.state.declared_root_cause
	return result


	def run_batch(
	scenarios: list[Scenario],
	policy_by_role: dict[AgentRole, Policy],
	) -> list[EpisodeResult]:
	"""Evaluate a policy map across multiple scenarios — used for baselines."""
	env = ChaosOpsEnvironment()
	return [run_episode(env, sc, policy_by_role) for sc in scenarios]


	def _noop_action_type():
	# Imported lazily to avoid circular imports when this module is loaded
	# as part of ``chaosops.agents``.
	from chaosops.env.models import ActionType

	return ActionType.NOOP


	__all__ = [
	"EpisodeStep",
	"EpisodeResult",
	"run_episode",
	"run_batch",
	]