chaosops / app.py
helloAK96's picture
Phase A submission cleanup — OpenEnv compliance + composable rubrics + loud-fail trained lane
adfe21e
"""ChaosOps AI — Hugging Face Space entry point.
Gradio UI that lets a judge replay any incident scenario with any policy
(random / heuristic / oracle / trained) and watch the multi-agent response
unfold step-by-step. The trained-policy lane activates when the environment
variable ``CHAOSOPS_ADAPTER_PATH`` points at a LoRA adapter directory —
otherwise the Space still runs, silently falling back to the heuristic so
the UI works during cold-start or when no checkpoint has been uploaded yet.
Deploy layout:
hf_space/
app.py — this file (entry point HF Spaces picks up)
requirements.txt — pulls chaosops from GitHub + Gradio + torch stack
README.md — HF Space card (YAML frontmatter)
"""
from __future__ import annotations
import html
import logging
import os
import sys
from pathlib import Path
import gradio as gr
_LOG = logging.getLogger("chaosops.app")
if not _LOG.handlers:
_h = logging.StreamHandler(sys.stderr)
_h.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
_LOG.addHandler(_h)
_LOG.setLevel(logging.INFO)
from chaosops.agents.policies import (
Policy,
heuristic_policy,
oracle_policy,
random_policy,
)
from chaosops.agents.runner import EpisodeResult, run_episode
from chaosops.dashboard.transcript import ROLE_TAG, render_transcript
from chaosops.env.environment import ChaosOpsEnvironment
from chaosops.env.models import AgentRole, DifficultyTier, FailureType
from chaosops.env.world_sim import Scenario
ADAPTER_ENV = "CHAOSOPS_ADAPTER_PATH"
_TRAINED_POLICY_CACHE = None
# Last failure reason — surfaced in the run-summary so judges aren't tricked
# by a silent heuristic fallback when the trained lane is broken.
_TRAINED_LOAD_ERROR: str | None = None
# ---------------------------------------------------------------------------
# Policy resolution
# ---------------------------------------------------------------------------
def _lazy_trained_policy():
"""Load the trained LoRA adapter once per process, lazily.
``CHAOSOPS_ADAPTER_PATH`` accepts either:
* a local filesystem path (used in Colab / local dev), or
* an HF Hub repo id like ``helloAK96/chaosops-grpo-lora`` (Spaces).
For repo ids we materialise the adapter to local disk via
``snapshot_download`` on the first call — the second call hits the
in-process cache and is free.
Failures are logged at ERROR level and recorded in
:data:`_TRAINED_LOAD_ERROR` so the Gradio summary can surface
"trained adapter unavailable" instead of silently swapping in the
heuristic policy.
"""
global _TRAINED_POLICY_CACHE, _TRAINED_LOAD_ERROR
if _TRAINED_POLICY_CACHE is not None:
return _TRAINED_POLICY_CACHE
adapter_ref = os.environ.get(ADAPTER_ENV)
if not adapter_ref:
_TRAINED_LOAD_ERROR = (
f"{ADAPTER_ENV} env var is unset; trained lane disabled"
)
_LOG.warning(_TRAINED_LOAD_ERROR)
return None
local_path = Path(adapter_ref)
if not local_path.exists():
# Treat the value as an HF Hub repo id and snapshot_download it.
try:
from huggingface_hub import snapshot_download
except ImportError as exc:
_TRAINED_LOAD_ERROR = (
f"huggingface_hub import failed ({exc}); cannot fetch adapter"
)
_LOG.error(_TRAINED_LOAD_ERROR)
return None
try:
local_path = Path(
snapshot_download(repo_id=adapter_ref, repo_type="model")
)
except Exception as exc:
_TRAINED_LOAD_ERROR = (
f"snapshot_download({adapter_ref!r}) failed: {exc!r}"
)
_LOG.exception(_TRAINED_LOAD_ERROR)
return None
try:
from chaosops.agents.trained_policy import TrainedPolicy
_TRAINED_POLICY_CACHE = TrainedPolicy.from_adapter(local_path)
except Exception as exc:
_TRAINED_LOAD_ERROR = (
f"TrainedPolicy.from_adapter({local_path}) failed: {exc!r}"
)
_LOG.exception(_TRAINED_LOAD_ERROR)
return None
_LOG.info("trained adapter loaded from %s", local_path)
_TRAINED_LOAD_ERROR = None
return _TRAINED_POLICY_CACHE
def _build_policy(name: str, scenario: Scenario) -> Policy:
if name == "random":
return random_policy(seed=scenario.seed)
if name == "heuristic":
return heuristic_policy(seed=scenario.seed)
if name == "oracle":
return oracle_policy(scenario.failure_type)
if name == "trained":
trained = _lazy_trained_policy()
if trained is None:
# Graceful fallback — Space is still useful before adapter lands.
return heuristic_policy(seed=scenario.seed)
return trained.as_policy()
raise ValueError(f"unknown policy '{name}'")
# ---------------------------------------------------------------------------
# Rendering helpers
# ---------------------------------------------------------------------------
_ROLE_COLOR: dict[str, str] = {
"SRE": "#2980b9",
"DEV": "#16a085",
"MGR": "#8e44ad",
"OVS": "#c0392b",
}
def _render_chat_html(result: EpisodeResult) -> str:
"""Render the episode as a coloured chat log for the Gradio HTML widget."""
blocks: list[str] = []
for step in result.steps:
tag = ROLE_TAG[step.role]
color = _ROLE_COLOR.get(tag, "#333")
args = step.action.args or {}
args_str = " ".join(f"{k}={v}" for k, v in args.items())
target = step.action.target or "-"
summary = (
f"{step.action.action_type.value} target={target}"
+ (f" {args_str}" if args_str else "")
)
blocks.append(
f'<div style="margin-bottom:6px;">'
f'<span style="color:{color};font-weight:600;">t{step.turn:02d} [{tag}]</span> '
f'<span style="font-family:monospace;">{html.escape(summary)}</span> '
f'<span style="color:#888;">reward={step.reward:+.1f}</span>'
f"</div>"
)
footer = (
f'<hr style="margin:10px 0;">'
f'<div><b>resolved:</b> {result.resolved} · '
f'<b>steps:</b> {result.final_step} · '
f'<b>cum_reward:</b> {result.cumulative_reward:+.1f} · '
f'<b>wrong_fixes:</b> {result.wrong_fixes} · '
f'<b>oversight_flags:</b> {result.oversight_flags or "[]"}</div>'
)
return '<div style="font-size:13px;line-height:1.5;">' + "".join(blocks) + footer + "</div>"
# ---------------------------------------------------------------------------
# Episode runner (called from the Gradio button)
# ---------------------------------------------------------------------------
def run_scenario(failure: str, difficulty: str, policy_name: str, seed: int):
scenario = Scenario.from_type(
FailureType(failure),
seed=int(seed),
difficulty=DifficultyTier(difficulty),
)
policy = _build_policy(policy_name, scenario)
env = ChaosOpsEnvironment()
result = run_episode(env, scenario, {r: policy for r in AgentRole})
chat_html = _render_chat_html(result)
transcript = render_transcript(result)
summary = {
"failure_type": failure,
"difficulty": difficulty,
"policy": policy_name,
"seed": int(seed),
"resolved": result.resolved,
"steps_to_resolve": result.final_step if result.resolved else None,
"cumulative_reward": round(result.cumulative_reward, 2),
"wrong_fixes": result.wrong_fixes,
"oversight_flags": result.oversight_flags,
}
if policy_name == "trained":
if _TRAINED_POLICY_CACHE is None:
summary["trained_adapter_status"] = (
f"UNAVAILABLE (fell back to heuristic): "
f"{_TRAINED_LOAD_ERROR or 'unknown'}"
)
else:
summary["trained_adapter_status"] = "loaded"
return chat_html, summary, transcript
# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
INTRO_MARKDOWN = """
# ChaosOps AI — Multi-Agent Incident-Response Gym
A reinforcement-learning environment where a **four-agent fleet**
(SRE · Dev · Manager · **Oversight**) resolves a randomly injected
infrastructure incident. The fourth agent is a **scalable-oversight model**
whose job is to detect when *another AI in the fleet* (autoscaler,
load_balancer, deploy_bot) caused the incident — before the remediation
team touches the services.
**Policies**
- `random` · hard lower bound
- `heuristic` · what a decent human SRE would try
- `oracle` · cheats (knows ground truth) — upper-bound curve
- `trained` · our GRPO-tuned Qwen 2.5 1.5B LoRA checkpoint
Pick a failure type, smash **Run episode**, watch the team coordinate (or fail).
"""
def build_demo() -> gr.Blocks:
failure_choices = [f.value for f in FailureType]
tier_choices = [t.value for t in DifficultyTier]
policy_choices = ["random", "heuristic", "oracle", "trained"]
with gr.Blocks(title="ChaosOps AI") as demo:
gr.Markdown(INTRO_MARKDOWN)
with gr.Row():
with gr.Column(scale=1):
failure = gr.Dropdown(
failure_choices,
value="rogue_deploy_bot",
label="Failure type",
)
difficulty = gr.Dropdown(
tier_choices,
value="hard",
label="Difficulty",
)
policy = gr.Dropdown(
policy_choices,
value="oracle",
label="Policy",
)
seed = gr.Number(value=42, precision=0, label="Seed")
run_btn = gr.Button("▶ Run episode", variant="primary")
gr.Markdown(
"_Trained policy requires `CHAOSOPS_ADAPTER_PATH` to be "
"set on the Space. It falls back to the heuristic otherwise._"
)
with gr.Column(scale=2):
chat_out = gr.HTML(label="Episode chat")
summary_out = gr.JSON(label="Summary")
transcript_out = gr.Textbox(
label="Full transcript (reward breakdown)",
lines=18,
)
run_btn.click(
run_scenario,
inputs=[failure, difficulty, policy, seed],
outputs=[chat_out, summary_out, transcript_out],
)
return demo
if __name__ == "__main__":
# Docker Spaces route external traffic to port 7860; bind on 0.0.0.0 so
# the container's network namespace exposes the server beyond localhost.
build_demo().launch(server_name="0.0.0.0", server_port=7860)