"""ChaosOps AI — Hugging Face Space entry point. Gradio UI that lets a judge replay any incident scenario with any policy (random / heuristic / oracle / trained) and watch the multi-agent response unfold step-by-step. The trained-policy lane activates when the environment variable ``CHAOSOPS_ADAPTER_PATH`` points at a LoRA adapter directory — otherwise the Space still runs, silently falling back to the heuristic so the UI works during cold-start or when no checkpoint has been uploaded yet. Deploy layout: hf_space/ app.py — this file (entry point HF Spaces picks up) requirements.txt — pulls chaosops from GitHub + Gradio + torch stack README.md — HF Space card (YAML frontmatter) """ from __future__ import annotations import html import logging import os import sys from pathlib import Path import gradio as gr _LOG = logging.getLogger("chaosops.app") if not _LOG.handlers: _h = logging.StreamHandler(sys.stderr) _h.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")) _LOG.addHandler(_h) _LOG.setLevel(logging.INFO) from chaosops.agents.policies import ( Policy, heuristic_policy, oracle_policy, random_policy, ) from chaosops.agents.runner import EpisodeResult, run_episode from chaosops.dashboard.transcript import ROLE_TAG, render_transcript from chaosops.env.environment import ChaosOpsEnvironment from chaosops.env.models import AgentRole, DifficultyTier, FailureType from chaosops.env.world_sim import Scenario ADAPTER_ENV = "CHAOSOPS_ADAPTER_PATH" _TRAINED_POLICY_CACHE = None # Last failure reason — surfaced in the run-summary so judges aren't tricked # by a silent heuristic fallback when the trained lane is broken. _TRAINED_LOAD_ERROR: str | None = None # --------------------------------------------------------------------------- # Policy resolution # --------------------------------------------------------------------------- def _lazy_trained_policy(): """Load the trained LoRA adapter once per process, lazily. ``CHAOSOPS_ADAPTER_PATH`` accepts either: * a local filesystem path (used in Colab / local dev), or * an HF Hub repo id like ``helloAK96/chaosops-grpo-lora`` (Spaces). For repo ids we materialise the adapter to local disk via ``snapshot_download`` on the first call — the second call hits the in-process cache and is free. Failures are logged at ERROR level and recorded in :data:`_TRAINED_LOAD_ERROR` so the Gradio summary can surface "trained adapter unavailable" instead of silently swapping in the heuristic policy. """ global _TRAINED_POLICY_CACHE, _TRAINED_LOAD_ERROR if _TRAINED_POLICY_CACHE is not None: return _TRAINED_POLICY_CACHE adapter_ref = os.environ.get(ADAPTER_ENV) if not adapter_ref: _TRAINED_LOAD_ERROR = ( f"{ADAPTER_ENV} env var is unset; trained lane disabled" ) _LOG.warning(_TRAINED_LOAD_ERROR) return None local_path = Path(adapter_ref) if not local_path.exists(): # Treat the value as an HF Hub repo id and snapshot_download it. try: from huggingface_hub import snapshot_download except ImportError as exc: _TRAINED_LOAD_ERROR = ( f"huggingface_hub import failed ({exc}); cannot fetch adapter" ) _LOG.error(_TRAINED_LOAD_ERROR) return None try: local_path = Path( snapshot_download(repo_id=adapter_ref, repo_type="model") ) except Exception as exc: _TRAINED_LOAD_ERROR = ( f"snapshot_download({adapter_ref!r}) failed: {exc!r}" ) _LOG.exception(_TRAINED_LOAD_ERROR) return None try: from chaosops.agents.trained_policy import TrainedPolicy _TRAINED_POLICY_CACHE = TrainedPolicy.from_adapter(local_path) except Exception as exc: _TRAINED_LOAD_ERROR = ( f"TrainedPolicy.from_adapter({local_path}) failed: {exc!r}" ) _LOG.exception(_TRAINED_LOAD_ERROR) return None _LOG.info("trained adapter loaded from %s", local_path) _TRAINED_LOAD_ERROR = None return _TRAINED_POLICY_CACHE def _build_policy(name: str, scenario: Scenario) -> Policy: if name == "random": return random_policy(seed=scenario.seed) if name == "heuristic": return heuristic_policy(seed=scenario.seed) if name == "oracle": return oracle_policy(scenario.failure_type) if name == "trained": trained = _lazy_trained_policy() if trained is None: # Graceful fallback — Space is still useful before adapter lands. return heuristic_policy(seed=scenario.seed) return trained.as_policy() raise ValueError(f"unknown policy '{name}'") # --------------------------------------------------------------------------- # Rendering helpers # --------------------------------------------------------------------------- _ROLE_COLOR: dict[str, str] = { "SRE": "#2980b9", "DEV": "#16a085", "MGR": "#8e44ad", "OVS": "#c0392b", } def _render_chat_html(result: EpisodeResult) -> str: """Render the episode as a coloured chat log for the Gradio HTML widget.""" blocks: list[str] = [] for step in result.steps: tag = ROLE_TAG[step.role] color = _ROLE_COLOR.get(tag, "#333") args = step.action.args or {} args_str = " ".join(f"{k}={v}" for k, v in args.items()) target = step.action.target or "-" summary = ( f"{step.action.action_type.value} target={target}" + (f" {args_str}" if args_str else "") ) blocks.append( f'
' f't{step.turn:02d} [{tag}] ' f'{html.escape(summary)} ' f'reward={step.reward:+.1f}' f"
" ) footer = ( f'
' f'
resolved: {result.resolved} · ' f'steps: {result.final_step} · ' f'cum_reward: {result.cumulative_reward:+.1f} · ' f'wrong_fixes: {result.wrong_fixes} · ' f'oversight_flags: {result.oversight_flags or "[]"}
' ) return '
' + "".join(blocks) + footer + "
" # --------------------------------------------------------------------------- # Episode runner (called from the Gradio button) # --------------------------------------------------------------------------- def run_scenario(failure: str, difficulty: str, policy_name: str, seed: int): scenario = Scenario.from_type( FailureType(failure), seed=int(seed), difficulty=DifficultyTier(difficulty), ) policy = _build_policy(policy_name, scenario) env = ChaosOpsEnvironment() result = run_episode(env, scenario, {r: policy for r in AgentRole}) chat_html = _render_chat_html(result) transcript = render_transcript(result) summary = { "failure_type": failure, "difficulty": difficulty, "policy": policy_name, "seed": int(seed), "resolved": result.resolved, "steps_to_resolve": result.final_step if result.resolved else None, "cumulative_reward": round(result.cumulative_reward, 2), "wrong_fixes": result.wrong_fixes, "oversight_flags": result.oversight_flags, } if policy_name == "trained": if _TRAINED_POLICY_CACHE is None: summary["trained_adapter_status"] = ( f"UNAVAILABLE (fell back to heuristic): " f"{_TRAINED_LOAD_ERROR or 'unknown'}" ) else: summary["trained_adapter_status"] = "loaded" return chat_html, summary, transcript # --------------------------------------------------------------------------- # UI # --------------------------------------------------------------------------- INTRO_MARKDOWN = """ # ChaosOps AI — Multi-Agent Incident-Response Gym A reinforcement-learning environment where a **four-agent fleet** (SRE · Dev · Manager · **Oversight**) resolves a randomly injected infrastructure incident. The fourth agent is a **scalable-oversight model** whose job is to detect when *another AI in the fleet* (autoscaler, load_balancer, deploy_bot) caused the incident — before the remediation team touches the services. **Policies** - `random` · hard lower bound - `heuristic` · what a decent human SRE would try - `oracle` · cheats (knows ground truth) — upper-bound curve - `trained` · our GRPO-tuned Qwen 2.5 1.5B LoRA checkpoint Pick a failure type, smash **Run episode**, watch the team coordinate (or fail). """ def build_demo() -> gr.Blocks: failure_choices = [f.value for f in FailureType] tier_choices = [t.value for t in DifficultyTier] policy_choices = ["random", "heuristic", "oracle", "trained"] with gr.Blocks(title="ChaosOps AI") as demo: gr.Markdown(INTRO_MARKDOWN) with gr.Row(): with gr.Column(scale=1): failure = gr.Dropdown( failure_choices, value="rogue_deploy_bot", label="Failure type", ) difficulty = gr.Dropdown( tier_choices, value="hard", label="Difficulty", ) policy = gr.Dropdown( policy_choices, value="oracle", label="Policy", ) seed = gr.Number(value=42, precision=0, label="Seed") run_btn = gr.Button("▶ Run episode", variant="primary") gr.Markdown( "_Trained policy requires `CHAOSOPS_ADAPTER_PATH` to be " "set on the Space. It falls back to the heuristic otherwise._" ) with gr.Column(scale=2): chat_out = gr.HTML(label="Episode chat") summary_out = gr.JSON(label="Summary") transcript_out = gr.Textbox( label="Full transcript (reward breakdown)", lines=18, ) run_btn.click( run_scenario, inputs=[failure, difficulty, policy, seed], outputs=[chat_out, summary_out, transcript_out], ) return demo if __name__ == "__main__": # Docker Spaces route external traffic to port 7860; bind on 0.0.0.0 so # the container's network namespace exposes the server beyond localhost. build_demo().launch(server_name="0.0.0.0", server_port=7860)