"""ChaosOps AI — Hugging Face Space entry point. Gradio UI that lets a judge replay any incident scenario with any policy (random / heuristic / oracle / trained) and watch the multi-agent response unfold step-by-step. The trained-policy lane activates when the environment variable ``CHAOSOPS_ADAPTER_PATH`` points at a LoRA adapter directory — otherwise the Space still runs, silently falling back to the heuristic so the UI works during cold-start or when no checkpoint has been uploaded yet. Deploy layout: hf_space/ app.py — this file (entry point HF Spaces picks up) requirements.txt — pulls chaosops from GitHub + Gradio + torch stack README.md — HF Space card (YAML frontmatter) """ from __future__ import annotations import html import logging import os import sys from pathlib import Path import gradio as gr _LOG = logging.getLogger("chaosops.app") if not _LOG.handlers: _h = logging.StreamHandler(sys.stderr) _h.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")) _LOG.addHandler(_h) _LOG.setLevel(logging.INFO) from chaosops.agents.policies import ( Policy, heuristic_policy, oracle_policy, random_policy, ) from chaosops.agents.runner import EpisodeResult, run_episode from chaosops.dashboard.transcript import ROLE_TAG, render_transcript from chaosops.env.environment import ChaosOpsEnvironment from chaosops.env.models import AgentRole, DifficultyTier, FailureType from chaosops.env.world_sim import Scenario ADAPTER_ENV = "CHAOSOPS_ADAPTER_PATH" _TRAINED_POLICY_CACHE = None # Last failure reason — surfaced in the run-summary so judges aren't tricked # by a silent heuristic fallback when the trained lane is broken. _TRAINED_LOAD_ERROR: str | None = None # --------------------------------------------------------------------------- # Policy resolution # --------------------------------------------------------------------------- def _lazy_trained_policy(): """Load the trained LoRA adapter once per process, lazily. ``CHAOSOPS_ADAPTER_PATH`` accepts either: * a local filesystem path (used in Colab / local dev), or * an HF Hub repo id like ``helloAK96/chaosops-grpo-lora`` (Spaces). For repo ids we materialise the adapter to local disk via ``snapshot_download`` on the first call — the second call hits the in-process cache and is free. Failures are logged at ERROR level and recorded in :data:`_TRAINED_LOAD_ERROR` so the Gradio summary can surface "trained adapter unavailable" instead of silently swapping in the heuristic policy. """ global _TRAINED_POLICY_CACHE, _TRAINED_LOAD_ERROR if _TRAINED_POLICY_CACHE is not None: return _TRAINED_POLICY_CACHE adapter_ref = os.environ.get(ADAPTER_ENV) if not adapter_ref: _TRAINED_LOAD_ERROR = ( f"{ADAPTER_ENV} env var is unset; trained lane disabled" ) _LOG.warning(_TRAINED_LOAD_ERROR) return None local_path = Path(adapter_ref) if not local_path.exists(): # Treat the value as an HF Hub repo id and snapshot_download it. try: from huggingface_hub import snapshot_download except ImportError as exc: _TRAINED_LOAD_ERROR = ( f"huggingface_hub import failed ({exc}); cannot fetch adapter" ) _LOG.error(_TRAINED_LOAD_ERROR) return None try: local_path = Path( snapshot_download(repo_id=adapter_ref, repo_type="model") ) except Exception as exc: _TRAINED_LOAD_ERROR = ( f"snapshot_download({adapter_ref!r}) failed: {exc!r}" ) _LOG.exception(_TRAINED_LOAD_ERROR) return None try: from chaosops.agents.trained_policy import TrainedPolicy _TRAINED_POLICY_CACHE = TrainedPolicy.from_adapter(local_path) except Exception as exc: _TRAINED_LOAD_ERROR = ( f"TrainedPolicy.from_adapter({local_path}) failed: {exc!r}" ) _LOG.exception(_TRAINED_LOAD_ERROR) return None _LOG.info("trained adapter loaded from %s", local_path) _TRAINED_LOAD_ERROR = None return _TRAINED_POLICY_CACHE def _build_policy(name: str, scenario: Scenario) -> Policy: if name == "random": return random_policy(seed=scenario.seed) if name == "heuristic": return heuristic_policy(seed=scenario.seed) if name == "oracle": return oracle_policy(scenario.failure_type) if name == "trained": trained = _lazy_trained_policy() if trained is None: # Graceful fallback — Space is still useful before adapter lands. return heuristic_policy(seed=scenario.seed) return trained.as_policy() raise ValueError(f"unknown policy '{name}'") # --------------------------------------------------------------------------- # Rendering helpers # --------------------------------------------------------------------------- _ROLE_COLOR: dict[str, str] = { "SRE": "#2980b9", "DEV": "#16a085", "MGR": "#8e44ad", "OVS": "#c0392b", } def _render_chat_html(result: EpisodeResult) -> str: """Render the episode as a coloured chat log for the Gradio HTML widget.""" blocks: list[str] = [] for step in result.steps: tag = ROLE_TAG[step.role] color = _ROLE_COLOR.get(tag, "#333") args = step.action.args or {} args_str = " ".join(f"{k}={v}" for k, v in args.items()) target = step.action.target or "-" summary = ( f"{step.action.action_type.value} target={target}" + (f" {args_str}" if args_str else "") ) blocks.append( f'