Spaces:

helloAK96
/

chaosops

Running

File size: 10,899 Bytes

"""ChaosOps AI — Hugging Face Space entry point.

Gradio UI that lets a judge replay any incident scenario with any policy
(random / heuristic / oracle / trained) and watch the multi-agent response
unfold step-by-step. The trained-policy lane activates when the environment
variable ``CHAOSOPS_ADAPTER_PATH`` points at a LoRA adapter directory —
otherwise the Space still runs, silently falling back to the heuristic so
the UI works during cold-start or when no checkpoint has been uploaded yet.

Deploy layout:
    hf_space/
        app.py            — this file (entry point HF Spaces picks up)
        requirements.txt  — pulls chaosops from GitHub + Gradio + torch stack
        README.md         — HF Space card (YAML frontmatter)
"""

from __future__ import annotations

import html
import logging
import os
import sys
from pathlib import Path

import gradio as gr

_LOG = logging.getLogger("chaosops.app")
if not _LOG.handlers:
    _h = logging.StreamHandler(sys.stderr)
    _h.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
    _LOG.addHandler(_h)
_LOG.setLevel(logging.INFO)

from chaosops.agents.policies import (
    Policy,
    heuristic_policy,
    oracle_policy,
    random_policy,
)
from chaosops.agents.runner import EpisodeResult, run_episode
from chaosops.dashboard.transcript import ROLE_TAG, render_transcript
from chaosops.env.environment import ChaosOpsEnvironment
from chaosops.env.models import AgentRole, DifficultyTier, FailureType
from chaosops.env.world_sim import Scenario


ADAPTER_ENV = "CHAOSOPS_ADAPTER_PATH"
_TRAINED_POLICY_CACHE = None
# Last failure reason — surfaced in the run-summary so judges aren't tricked
# by a silent heuristic fallback when the trained lane is broken.
_TRAINED_LOAD_ERROR: str | None = None


# ---------------------------------------------------------------------------
# Policy resolution
# ---------------------------------------------------------------------------


def _lazy_trained_policy():
    """Load the trained LoRA adapter once per process, lazily.

    ``CHAOSOPS_ADAPTER_PATH`` accepts either:
      * a local filesystem path (used in Colab / local dev), or
      * an HF Hub repo id like ``helloAK96/chaosops-grpo-lora`` (Spaces).

    For repo ids we materialise the adapter to local disk via
    ``snapshot_download`` on the first call — the second call hits the
    in-process cache and is free.

    Failures are logged at ERROR level and recorded in
    :data:`_TRAINED_LOAD_ERROR` so the Gradio summary can surface
    "trained adapter unavailable" instead of silently swapping in the
    heuristic policy.
    """
    global _TRAINED_POLICY_CACHE, _TRAINED_LOAD_ERROR
    if _TRAINED_POLICY_CACHE is not None:
        return _TRAINED_POLICY_CACHE
    adapter_ref = os.environ.get(ADAPTER_ENV)
    if not adapter_ref:
        _TRAINED_LOAD_ERROR = (
            f"{ADAPTER_ENV} env var is unset; trained lane disabled"
        )
        _LOG.warning(_TRAINED_LOAD_ERROR)
        return None

    local_path = Path(adapter_ref)
    if not local_path.exists():
        # Treat the value as an HF Hub repo id and snapshot_download it.
        try:
            from huggingface_hub import snapshot_download
        except ImportError as exc:
            _TRAINED_LOAD_ERROR = (
                f"huggingface_hub import failed ({exc}); cannot fetch adapter"
            )
            _LOG.error(_TRAINED_LOAD_ERROR)
            return None
        try:
            local_path = Path(
                snapshot_download(repo_id=adapter_ref, repo_type="model")
            )
        except Exception as exc:
            _TRAINED_LOAD_ERROR = (
                f"snapshot_download({adapter_ref!r}) failed: {exc!r}"
            )
            _LOG.exception(_TRAINED_LOAD_ERROR)
            return None

    try:
        from chaosops.agents.trained_policy import TrainedPolicy

        _TRAINED_POLICY_CACHE = TrainedPolicy.from_adapter(local_path)
    except Exception as exc:
        _TRAINED_LOAD_ERROR = (
            f"TrainedPolicy.from_adapter({local_path}) failed: {exc!r}"
        )
        _LOG.exception(_TRAINED_LOAD_ERROR)
        return None

    _LOG.info("trained adapter loaded from %s", local_path)
    _TRAINED_LOAD_ERROR = None
    return _TRAINED_POLICY_CACHE


def _build_policy(name: str, scenario: Scenario) -> Policy:
    if name == "random":
        return random_policy(seed=scenario.seed)
    if name == "heuristic":
        return heuristic_policy(seed=scenario.seed)
    if name == "oracle":
        return oracle_policy(scenario.failure_type)
    if name == "trained":
        trained = _lazy_trained_policy()
        if trained is None:
            # Graceful fallback — Space is still useful before adapter lands.
            return heuristic_policy(seed=scenario.seed)
        return trained.as_policy()
    raise ValueError(f"unknown policy '{name}'")


# ---------------------------------------------------------------------------
# Rendering helpers
# ---------------------------------------------------------------------------


_ROLE_COLOR: dict[str, str] = {
    "SRE": "#2980b9",
    "DEV": "#16a085",
    "MGR": "#8e44ad",
    "OVS": "#c0392b",
}


def _render_chat_html(result: EpisodeResult) -> str:
    """Render the episode as a coloured chat log for the Gradio HTML widget."""
    blocks: list[str] = []
    for step in result.steps:
        tag = ROLE_TAG[step.role]
        color = _ROLE_COLOR.get(tag, "#333")
        args = step.action.args or {}
        args_str = " ".join(f"{k}={v}" for k, v in args.items())
        target = step.action.target or "-"
        summary = (
            f"{step.action.action_type.value} target={target}"
            + (f" {args_str}" if args_str else "")
        )
        blocks.append(
            f'<div style="margin-bottom:6px;">'
            f'<span style="color:{color};font-weight:600;">t{step.turn:02d} [{tag}]</span> '
            f'<span style="font-family:monospace;">{html.escape(summary)}</span> '
            f'<span style="color:#888;">reward={step.reward:+.1f}</span>'
            f"</div>"
        )
    footer = (
        f'<hr style="margin:10px 0;">'
        f'<div><b>resolved:</b> {result.resolved} · '
        f'<b>steps:</b> {result.final_step} · '
        f'<b>cum_reward:</b> {result.cumulative_reward:+.1f} · '
        f'<b>wrong_fixes:</b> {result.wrong_fixes} · '
        f'<b>oversight_flags:</b> {result.oversight_flags or "[]"}</div>'
    )
    return '<div style="font-size:13px;line-height:1.5;">' + "".join(blocks) + footer + "</div>"


# ---------------------------------------------------------------------------
# Episode runner (called from the Gradio button)
# ---------------------------------------------------------------------------


def run_scenario(failure: str, difficulty: str, policy_name: str, seed: int):
    scenario = Scenario.from_type(
        FailureType(failure),
        seed=int(seed),
        difficulty=DifficultyTier(difficulty),
    )
    policy = _build_policy(policy_name, scenario)
    env = ChaosOpsEnvironment()
    result = run_episode(env, scenario, {r: policy for r in AgentRole})

    chat_html = _render_chat_html(result)
    transcript = render_transcript(result)

    summary = {
        "failure_type": failure,
        "difficulty": difficulty,
        "policy": policy_name,
        "seed": int(seed),
        "resolved": result.resolved,
        "steps_to_resolve": result.final_step if result.resolved else None,
        "cumulative_reward": round(result.cumulative_reward, 2),
        "wrong_fixes": result.wrong_fixes,
        "oversight_flags": result.oversight_flags,
    }
    if policy_name == "trained":
        if _TRAINED_POLICY_CACHE is None:
            summary["trained_adapter_status"] = (
                f"UNAVAILABLE (fell back to heuristic): "
                f"{_TRAINED_LOAD_ERROR or 'unknown'}"
            )
        else:
            summary["trained_adapter_status"] = "loaded"
    return chat_html, summary, transcript


# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------


INTRO_MARKDOWN = """
# ChaosOps AI — Multi-Agent Incident-Response Gym

A reinforcement-learning environment where a **four-agent fleet**
(SRE · Dev · Manager · **Oversight**) resolves a randomly injected
infrastructure incident. The fourth agent is a **scalable-oversight model**
whose job is to detect when *another AI in the fleet* (autoscaler,
load_balancer, deploy_bot) caused the incident — before the remediation
team touches the services.

**Policies**
- `random` · hard lower bound
- `heuristic` · what a decent human SRE would try
- `oracle` · cheats (knows ground truth) — upper-bound curve
- `trained` · our GRPO-tuned Qwen 2.5 1.5B LoRA checkpoint

Pick a failure type, smash **Run episode**, watch the team coordinate (or fail).
"""


def build_demo() -> gr.Blocks:
    failure_choices = [f.value for f in FailureType]
    tier_choices = [t.value for t in DifficultyTier]
    policy_choices = ["random", "heuristic", "oracle", "trained"]

    with gr.Blocks(title="ChaosOps AI") as demo:
        gr.Markdown(INTRO_MARKDOWN)

        with gr.Row():
            with gr.Column(scale=1):
                failure = gr.Dropdown(
                    failure_choices,
                    value="rogue_deploy_bot",
                    label="Failure type",
                )
                difficulty = gr.Dropdown(
                    tier_choices,
                    value="hard",
                    label="Difficulty",
                )
                policy = gr.Dropdown(
                    policy_choices,
                    value="oracle",
                    label="Policy",
                )
                seed = gr.Number(value=42, precision=0, label="Seed")
                run_btn = gr.Button("▶ Run episode", variant="primary")
                gr.Markdown(
                    "_Trained policy requires `CHAOSOPS_ADAPTER_PATH` to be "
                    "set on the Space. It falls back to the heuristic otherwise._"
                )
            with gr.Column(scale=2):
                chat_out = gr.HTML(label="Episode chat")
                summary_out = gr.JSON(label="Summary")
        transcript_out = gr.Textbox(
            label="Full transcript (reward breakdown)",
            lines=18,
        )

        run_btn.click(
            run_scenario,
            inputs=[failure, difficulty, policy, seed],
            outputs=[chat_out, summary_out, transcript_out],
        )

    return demo


if __name__ == "__main__":
    # Docker Spaces route external traffic to port 7860; bind on 0.0.0.0 so
    # the container's network namespace exposes the server beyond localhost.
    build_demo().launch(server_name="0.0.0.0", server_port=7860)