Spaces:
Build error
Build error
Upload folder using huggingface_hub
Browse files- Dockerfile +31 -0
- models.py +41 -0
- serve.py +143 -0
- server/__init__.py +11 -0
- server/app.py +41 -0
- server/baselines.py +217 -0
- server/scenarios.py +0 -0
- server/stack_doctor_environment.py +279 -0
- server/stack_doctor_mcp.py +396 -0
- static/index.html +1566 -0
Dockerfile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install dependencies
|
| 6 |
+
RUN pip install --no-cache-dir \
|
| 7 |
+
openenv-core \
|
| 8 |
+
fastapi \
|
| 9 |
+
uvicorn \
|
| 10 |
+
mcp \
|
| 11 |
+
torch --index-url https://download.pytorch.org/whl/cpu \
|
| 12 |
+
transformers \
|
| 13 |
+
accelerate \
|
| 14 |
+
sentencepiece
|
| 15 |
+
|
| 16 |
+
# Pre-download model weights at build time (faster cold start)
|
| 17 |
+
RUN python -c "from transformers import AutoModelForCausalLM, AutoTokenizer; \
|
| 18 |
+
AutoTokenizer.from_pretrained('Qwen/Qwen2.5-1.5B-Instruct'); \
|
| 19 |
+
AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-1.5B-Instruct')"
|
| 20 |
+
|
| 21 |
+
# Copy app code
|
| 22 |
+
COPY server/ server/
|
| 23 |
+
COPY models.py .
|
| 24 |
+
COPY hf_space/serve.py .
|
| 25 |
+
|
| 26 |
+
# Copy dashboard
|
| 27 |
+
COPY dashboard.html static/index.html
|
| 28 |
+
|
| 29 |
+
EXPOSE 7860
|
| 30 |
+
|
| 31 |
+
CMD ["python", "serve.py"]
|
models.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data models for the Stack Doctor Environment.
|
| 3 |
+
|
| 4 |
+
An overseer LLM diagnoses sick inference stacks by probing subsystems,
|
| 5 |
+
reconciling conflicting specialist-agent reports, and selecting the
|
| 6 |
+
minimal correct fix.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from pydantic import Field
|
| 10 |
+
|
| 11 |
+
from openenv.core.env_server.types import Action, Observation
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class StackDoctorAction(Action):
|
| 15 |
+
"""Agent action — a JSON message selecting one of 4 action types."""
|
| 16 |
+
|
| 17 |
+
message: str = Field(
|
| 18 |
+
...,
|
| 19 |
+
description=(
|
| 20 |
+
'JSON action. One of:\n'
|
| 21 |
+
' {"type":"inspect","target":"logs|config|snippet|metrics"}\n'
|
| 22 |
+
' {"type":"ask_specialist","specialist":"runtime|dispatch|kernel|loader"}\n'
|
| 23 |
+
' {"type":"apply_fix","fix":"relax_arch_check|add_whitelist_entry|fix_runtime_path|switch_backend|update_model_config|fix_weight_mapping|tune_memory_config|fix_quantization|fix_comm_config|update_driver_config"}\n'
|
| 24 |
+
' {"type":"submit","root_cause":"...","fix":"...","justification":"..."}'
|
| 25 |
+
),
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class StackDoctorObservation(Observation):
|
| 30 |
+
"""What the agent sees after each action."""
|
| 31 |
+
|
| 32 |
+
output: str = Field(default="", description="Natural-language feedback")
|
| 33 |
+
incident_ticket: str = Field(default="", description="The incident description")
|
| 34 |
+
hardware: str = Field(default="", description="Hardware identifier")
|
| 35 |
+
model_name: str = Field(default="", description="Model being served")
|
| 36 |
+
backend: str = Field(default="", description="Inference backend in use")
|
| 37 |
+
log_excerpt: str = Field(default="", description="Log snippet")
|
| 38 |
+
code_snippet: str = Field(default="", description="Config or code snippet")
|
| 39 |
+
specialist_opinions: dict = Field(default_factory=dict, description="Specialist name -> {opinion, confidence}")
|
| 40 |
+
steps_remaining: int = Field(default=6, description="Steps left in episode")
|
| 41 |
+
fix_used: bool = Field(default=False, description="Whether apply_fix has been used")
|
serve.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Unified server for HF Spaces: environment + inference + dashboard on port 7860."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
import time
|
| 7 |
+
import threading
|
| 8 |
+
|
| 9 |
+
sys.path.insert(0, "/app")
|
| 10 |
+
|
| 11 |
+
from fastapi import FastAPI, Request
|
| 12 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 13 |
+
from fastapi.responses import FileResponse, JSONResponse
|
| 14 |
+
import uvicorn
|
| 15 |
+
|
| 16 |
+
from server.app import app as env_app
|
| 17 |
+
|
| 18 |
+
env_app.add_middleware(
|
| 19 |
+
CORSMiddleware,
|
| 20 |
+
allow_origins=["*"],
|
| 21 |
+
allow_methods=["*"],
|
| 22 |
+
allow_headers=["*"],
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Model state (loaded in background)
|
| 26 |
+
MODEL_STATE = {"model": None, "tokenizer": None, "ready": False, "error": None}
|
| 27 |
+
|
| 28 |
+
UNTRAINED_SYSTEM = (
|
| 29 |
+
"You are Stack Doctor, an expert AI agent that diagnoses inference-stack incidents.\n"
|
| 30 |
+
"You receive an incident ticket with hardware/model/backend context, log excerpts, and specialist opinions.\n"
|
| 31 |
+
"Some specialists may be wrong. Output a JSON array of actions:\n"
|
| 32 |
+
' {"type":"inspect","target":"logs|config|snippet|metrics"}\n'
|
| 33 |
+
' {"type":"ask_specialist","specialist":"runtime|dispatch|kernel|loader"}\n'
|
| 34 |
+
' {"type":"apply_fix","fix":"<fix_name>"}\n'
|
| 35 |
+
' {"type":"submit","root_cause":"<cause>","fix":"<fix>","justification":"<why>"}'
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
TRAINED_SYSTEM = (
|
| 39 |
+
"You are Stack Doctor, an expert AI agent that diagnoses inference-stack incidents.\n"
|
| 40 |
+
"You are methodical: first inspect logs and config, then query specialists to cross-verify (some lie), then apply a fix and submit.\n\n"
|
| 41 |
+
"Available actions (output as a JSON array):\n"
|
| 42 |
+
' {"type":"inspect","target":"logs"} or "config" or "snippet" or "metrics"\n'
|
| 43 |
+
' {"type":"ask_specialist","specialist":"runtime"} or "dispatch" or "kernel" or "loader"\n'
|
| 44 |
+
' {"type":"apply_fix","fix":"<name>"} -- available fixes: add_whitelist_entry, fix_comm_config, fix_quantization, fix_runtime_path, fix_weight_mapping, relax_arch_check, switch_backend, tune_memory_config, update_driver_config, update_model_config\n'
|
| 45 |
+
' {"type":"submit","root_cause":"<cause>","fix":"<fix>","justification":"<detailed reasoning>"}\n\n'
|
| 46 |
+
"Available root causes: arch_guard, backend_selector, backend_whitelist, distributed_comm, driver_compat, memory_oom, model_config, quantization_error, runtime_loader, weight_layout\n\n"
|
| 47 |
+
"IMPORTANT: Pick ONE target per inspect, ONE specialist per query. Investigate before submitting. Give a detailed justification.\n\n"
|
| 48 |
+
"Example output:\n"
|
| 49 |
+
'[{"type":"inspect","target":"logs"},{"type":"inspect","target":"config"},{"type":"ask_specialist","specialist":"kernel"},'
|
| 50 |
+
'{"type":"apply_fix","fix":"relax_arch_check"},'
|
| 51 |
+
'{"type":"submit","root_cause":"arch_guard","fix":"relax_arch_check","justification":"Logs show architecture check failure for SM90. Config confirms guard enabled. Kernel specialist confirmed not a kernel issue."}]'
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def load_model_background():
|
| 56 |
+
"""Load Qwen 1.5B in a background thread so the server starts fast."""
|
| 57 |
+
try:
|
| 58 |
+
print("[Model] Loading Qwen2.5-1.5B-Instruct (CPU)...")
|
| 59 |
+
t0 = time.time()
|
| 60 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 61 |
+
import torch
|
| 62 |
+
|
| 63 |
+
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 64 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 65 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 66 |
+
model_name,
|
| 67 |
+
torch_dtype=torch.float32,
|
| 68 |
+
device_map="cpu",
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
MODEL_STATE["model"] = model
|
| 72 |
+
MODEL_STATE["tokenizer"] = tokenizer
|
| 73 |
+
MODEL_STATE["ready"] = True
|
| 74 |
+
print(f"[Model] Loaded in {time.time()-t0:.1f}s")
|
| 75 |
+
except Exception as ex:
|
| 76 |
+
MODEL_STATE["error"] = str(ex)
|
| 77 |
+
print(f"[Model] Failed to load: {ex}")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
threading.Thread(target=load_model_background, daemon=True).start()
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
@env_app.post("/generate")
|
| 84 |
+
async def generate_endpoint(request: Request):
|
| 85 |
+
body = await request.json()
|
| 86 |
+
prompt_text = body.get("prompt", "")
|
| 87 |
+
max_tokens = body.get("max_tokens", 512)
|
| 88 |
+
mode = body.get("mode", "untrained")
|
| 89 |
+
|
| 90 |
+
if not MODEL_STATE["ready"]:
|
| 91 |
+
if MODEL_STATE["error"]:
|
| 92 |
+
return JSONResponse({"error": MODEL_STATE["error"]}, status_code=500)
|
| 93 |
+
return JSONResponse({"error": "Model still loading, please wait..."}, status_code=503)
|
| 94 |
+
|
| 95 |
+
model = MODEL_STATE["model"]
|
| 96 |
+
tokenizer = MODEL_STATE["tokenizer"]
|
| 97 |
+
system = TRAINED_SYSTEM if mode == "trained" else UNTRAINED_SYSTEM
|
| 98 |
+
|
| 99 |
+
messages = [
|
| 100 |
+
{"role": "system", "content": system},
|
| 101 |
+
{"role": "user", "content": prompt_text},
|
| 102 |
+
]
|
| 103 |
+
|
| 104 |
+
import torch
|
| 105 |
+
|
| 106 |
+
text_input = tokenizer.apply_chat_template(
|
| 107 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 108 |
+
)
|
| 109 |
+
inputs = tokenizer(text_input, return_tensors="pt")
|
| 110 |
+
|
| 111 |
+
t0 = time.time()
|
| 112 |
+
with torch.no_grad():
|
| 113 |
+
outputs = model.generate(
|
| 114 |
+
**inputs,
|
| 115 |
+
max_new_tokens=max_tokens,
|
| 116 |
+
do_sample=True,
|
| 117 |
+
temperature=0.7,
|
| 118 |
+
top_p=0.9,
|
| 119 |
+
pad_token_id=tokenizer.eos_token_id,
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
|
| 123 |
+
text = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 124 |
+
gen_time = time.time() - t0
|
| 125 |
+
print(f"[Model] Generated {len(text)} chars in {gen_time:.1f}s (mode={mode})")
|
| 126 |
+
return JSONResponse({"text": text, "gen_time": gen_time})
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
@env_app.get("/model_status")
|
| 130 |
+
async def model_status():
|
| 131 |
+
return JSONResponse({
|
| 132 |
+
"ready": MODEL_STATE["ready"],
|
| 133 |
+
"error": MODEL_STATE["error"],
|
| 134 |
+
})
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
@env_app.get("/", include_in_schema=False)
|
| 138 |
+
async def root():
|
| 139 |
+
return FileResponse("/app/static/index.html")
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
uvicorn.run(env_app, host="0.0.0.0", port=7860)
|
server/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Stack Doctor environment server components."""
|
| 2 |
+
|
| 3 |
+
from .stack_doctor_environment import StackDoctorEnvironment
|
| 4 |
+
|
| 5 |
+
__all__ = ["StackDoctorEnvironment"]
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_mcp_environment():
|
| 9 |
+
"""Lazy import of MCP environment (requires fastapi/uvicorn)."""
|
| 10 |
+
from .stack_doctor_mcp import StackDoctorMCPEnvironment
|
| 11 |
+
return StackDoctorMCPEnvironment
|
server/app.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI application for the Stack Doctor Environment.
|
| 3 |
+
|
| 4 |
+
Exposes both:
|
| 5 |
+
- WebSocket API (reset/step/state) for RL training
|
| 6 |
+
- MCP API (tools/list, tools/call) for agent interaction
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
from openenv.core.env_server.http_server import create_app
|
| 14 |
+
except Exception as e:
|
| 15 |
+
raise ImportError(
|
| 16 |
+
"openenv is required. Install with: uv sync"
|
| 17 |
+
) from e
|
| 18 |
+
|
| 19 |
+
from models import StackDoctorAction, StackDoctorObservation
|
| 20 |
+
from .stack_doctor_mcp import StackDoctorMCPEnvironment
|
| 21 |
+
|
| 22 |
+
app = create_app(
|
| 23 |
+
StackDoctorMCPEnvironment,
|
| 24 |
+
StackDoctorAction,
|
| 25 |
+
StackDoctorObservation,
|
| 26 |
+
env_name="stack_doctor",
|
| 27 |
+
max_concurrent_envs=4,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def main(host: str = "0.0.0.0", port: int = 8000):
|
| 32 |
+
import uvicorn
|
| 33 |
+
uvicorn.run(app, host=host, port=port)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
if __name__ == "__main__":
|
| 37 |
+
import argparse
|
| 38 |
+
parser = argparse.ArgumentParser()
|
| 39 |
+
parser.add_argument("--port", type=int, default=8000)
|
| 40 |
+
args = parser.parse_args()
|
| 41 |
+
main(port=args.port)
|
server/baselines.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Oracle, heuristic, and random baselines for Stack Doctor.
|
| 3 |
+
|
| 4 |
+
Used to validate the reward function: random < heuristic < oracle must hold.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import json
|
| 10 |
+
import random
|
| 11 |
+
|
| 12 |
+
from .scenarios import (
|
| 13 |
+
ROOT_CAUSE_TO_FIX,
|
| 14 |
+
ROOT_CAUSES,
|
| 15 |
+
FIXES,
|
| 16 |
+
SPECIALISTS,
|
| 17 |
+
Scenario,
|
| 18 |
+
SCENARIOS,
|
| 19 |
+
TRAIN_SCENARIOS,
|
| 20 |
+
EVAL_SCENARIOS,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def oracle_policy(scenario: Scenario) -> list[dict]:
|
| 25 |
+
"""Perfect policy: submit correct answer in 1 step."""
|
| 26 |
+
return [
|
| 27 |
+
{
|
| 28 |
+
"type": "submit",
|
| 29 |
+
"root_cause": scenario.root_cause,
|
| 30 |
+
"fix": scenario.correct_fix,
|
| 31 |
+
"justification": f"Root cause is {scenario.root_cause}, applying the correct fix.",
|
| 32 |
+
}
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def heuristic_policy(scenario: Scenario) -> list[dict]:
|
| 37 |
+
"""
|
| 38 |
+
Reasonable heuristic: inspect logs, ask the highest-confidence specialist,
|
| 39 |
+
then submit based on clues.
|
| 40 |
+
|
| 41 |
+
Uses keyword matching on specialist opinions and logs to guess root cause.
|
| 42 |
+
"""
|
| 43 |
+
actions = []
|
| 44 |
+
|
| 45 |
+
# Step 1: inspect logs
|
| 46 |
+
actions.append({"type": "inspect", "target": "logs"})
|
| 47 |
+
|
| 48 |
+
# Step 2: ask the highest-confidence specialist
|
| 49 |
+
best_spec = max(
|
| 50 |
+
scenario.specialist_opinions.items(),
|
| 51 |
+
key=lambda kv: kv[1].confidence,
|
| 52 |
+
)
|
| 53 |
+
actions.append({"type": "ask_specialist", "specialist": best_spec[0]})
|
| 54 |
+
|
| 55 |
+
# Step 3: heuristic root-cause guess from keywords
|
| 56 |
+
combined_text = (
|
| 57 |
+
scenario.incident_ticket
|
| 58 |
+
+ " " + scenario.initial_log
|
| 59 |
+
+ " " + best_spec[1].opinion
|
| 60 |
+
).lower()
|
| 61 |
+
|
| 62 |
+
guess = _keyword_guess(combined_text)
|
| 63 |
+
|
| 64 |
+
# Step 4: apply fix
|
| 65 |
+
actions.append({"type": "apply_fix", "fix": ROOT_CAUSE_TO_FIX[guess]})
|
| 66 |
+
|
| 67 |
+
# Step 5: submit
|
| 68 |
+
actions.append({
|
| 69 |
+
"type": "submit",
|
| 70 |
+
"root_cause": guess,
|
| 71 |
+
"fix": ROOT_CAUSE_TO_FIX[guess],
|
| 72 |
+
"justification": f"Keyword analysis of logs and specialist opinions points to {guess}.",
|
| 73 |
+
})
|
| 74 |
+
|
| 75 |
+
return actions
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def random_policy(scenario: Scenario) -> list[dict]:
|
| 79 |
+
"""Random policy: random actions, random submit."""
|
| 80 |
+
actions = []
|
| 81 |
+
n_steps = random.randint(1, 5)
|
| 82 |
+
|
| 83 |
+
for _ in range(n_steps - 1):
|
| 84 |
+
choice = random.choice(["inspect", "ask_specialist"])
|
| 85 |
+
if choice == "inspect":
|
| 86 |
+
actions.append({
|
| 87 |
+
"type": "inspect",
|
| 88 |
+
"target": random.choice(["logs", "config", "snippet", "metrics"]),
|
| 89 |
+
})
|
| 90 |
+
else:
|
| 91 |
+
actions.append({
|
| 92 |
+
"type": "ask_specialist",
|
| 93 |
+
"specialist": random.choice(SPECIALISTS),
|
| 94 |
+
})
|
| 95 |
+
|
| 96 |
+
# Final: random submit
|
| 97 |
+
rc = random.choice(ROOT_CAUSES)
|
| 98 |
+
actions.append({
|
| 99 |
+
"type": "submit",
|
| 100 |
+
"root_cause": rc,
|
| 101 |
+
"fix": ROOT_CAUSE_TO_FIX[rc],
|
| 102 |
+
})
|
| 103 |
+
|
| 104 |
+
return actions
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _keyword_guess(text: str) -> str:
|
| 108 |
+
"""Guess root cause from keyword presence in text."""
|
| 109 |
+
scores = {rc: 0 for rc in ROOT_CAUSES}
|
| 110 |
+
|
| 111 |
+
# arch_guard keywords
|
| 112 |
+
for kw in ["arch", "architecture", "sm_12", "sm_120", "sm_121", "supported_arch", "capability", "is_supported"]:
|
| 113 |
+
if kw in text:
|
| 114 |
+
scores["arch_guard"] += 1
|
| 115 |
+
|
| 116 |
+
# backend_whitelist keywords
|
| 117 |
+
for kw in ["whitelist", "supported_gpu", "not in", "marlin", "awq", "gpu name"]:
|
| 118 |
+
if kw in text:
|
| 119 |
+
scores["backend_whitelist"] += 1
|
| 120 |
+
|
| 121 |
+
# runtime_loader keywords
|
| 122 |
+
for kw in ["runtime", "libcuda", "ld_library", "cuda_home", "symlink", "shared object", "rocm_path", "hipError"]:
|
| 123 |
+
if kw in text:
|
| 124 |
+
scores["runtime_loader"] += 1
|
| 125 |
+
|
| 126 |
+
# backend_selector keywords
|
| 127 |
+
for kw in ["backend", "selector", "xformers", "flash_attn", "latency", "slow", "e4m3fn", "fp8 format"]:
|
| 128 |
+
if kw in text:
|
| 129 |
+
scores["backend_selector"] += 1
|
| 130 |
+
|
| 131 |
+
# model_config keywords
|
| 132 |
+
for kw in ["config", "num_expert", "shape mismatch", "rope", "checkpoint", "config.json"]:
|
| 133 |
+
if kw in text:
|
| 134 |
+
scores["model_config"] += 1
|
| 135 |
+
|
| 136 |
+
# weight_layout keywords
|
| 137 |
+
for kw in ["weight", "mapping", "swap", "gate_proj", "up_proj", "convert", "layout", "qkv"]:
|
| 138 |
+
if kw in text:
|
| 139 |
+
scores["weight_layout"] += 1
|
| 140 |
+
|
| 141 |
+
# memory_oom keywords
|
| 142 |
+
for kw in ["out of memory", "oom", "kv_cache", "memory", "max_model_len", "batch size", "vram"]:
|
| 143 |
+
if kw in text:
|
| 144 |
+
scores["memory_oom"] += 1
|
| 145 |
+
|
| 146 |
+
# quantization_error keywords
|
| 147 |
+
for kw in ["quantiz", "fp8", "int4", "nf4", "calibrat", "precision", "scale factor", "gptq"]:
|
| 148 |
+
if kw in text:
|
| 149 |
+
scores["quantization_error"] += 1
|
| 150 |
+
|
| 151 |
+
# distributed_comm keywords
|
| 152 |
+
for kw in ["nccl", "tensor parallel", "all_reduce", "rdma", "pipeline parallel", "collective", "rank"]:
|
| 153 |
+
if kw in text:
|
| 154 |
+
scores["distributed_comm"] += 1
|
| 155 |
+
|
| 156 |
+
# driver_compat keywords
|
| 157 |
+
for kw in ["driver", "cudnn", "toolkit", "nvcc", "cuda version", "driver version", "libcudnn"]:
|
| 158 |
+
if kw in text:
|
| 159 |
+
scores["driver_compat"] += 1
|
| 160 |
+
|
| 161 |
+
return max(scores, key=scores.get)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def evaluate_policy(policy_fn, scenarios: list[Scenario], n_runs: int = 1) -> dict:
|
| 165 |
+
"""
|
| 166 |
+
Run a policy across scenarios and compute metrics.
|
| 167 |
+
|
| 168 |
+
Returns dict with:
|
| 169 |
+
- rc_accuracy: fraction of correct root cause submissions
|
| 170 |
+
- fix_accuracy: fraction of correct fix submissions
|
| 171 |
+
- avg_steps: average steps to resolution
|
| 172 |
+
- avg_reward: average cumulative reward
|
| 173 |
+
"""
|
| 174 |
+
from .stack_doctor_environment import StackDoctorEnvironment
|
| 175 |
+
from models import StackDoctorAction
|
| 176 |
+
|
| 177 |
+
total_rc_correct = 0
|
| 178 |
+
total_fix_correct = 0
|
| 179 |
+
total_steps = 0
|
| 180 |
+
total_reward = 0.0
|
| 181 |
+
total_episodes = 0
|
| 182 |
+
|
| 183 |
+
for _ in range(n_runs):
|
| 184 |
+
for scenario in scenarios:
|
| 185 |
+
env = StackDoctorEnvironment()
|
| 186 |
+
env.reset(scenario_id=scenario.id)
|
| 187 |
+
|
| 188 |
+
actions = policy_fn(scenario)
|
| 189 |
+
cumulative = 0.0
|
| 190 |
+
steps = 0
|
| 191 |
+
|
| 192 |
+
for action_dict in actions:
|
| 193 |
+
obs = env.step(StackDoctorAction(message=json.dumps(action_dict)))
|
| 194 |
+
cumulative += obs.reward
|
| 195 |
+
steps += 1
|
| 196 |
+
if obs.done:
|
| 197 |
+
break
|
| 198 |
+
|
| 199 |
+
# Check if submit happened
|
| 200 |
+
last_action = actions[-1] if actions else {}
|
| 201 |
+
if last_action.get("type") == "submit":
|
| 202 |
+
if last_action["root_cause"] == scenario.root_cause:
|
| 203 |
+
total_rc_correct += 1
|
| 204 |
+
if last_action["fix"] == scenario.correct_fix:
|
| 205 |
+
total_fix_correct += 1
|
| 206 |
+
|
| 207 |
+
total_steps += steps
|
| 208 |
+
total_reward += cumulative
|
| 209 |
+
total_episodes += 1
|
| 210 |
+
|
| 211 |
+
return {
|
| 212 |
+
"rc_accuracy": total_rc_correct / total_episodes if total_episodes else 0,
|
| 213 |
+
"fix_accuracy": total_fix_correct / total_episodes if total_episodes else 0,
|
| 214 |
+
"avg_steps": total_steps / total_episodes if total_episodes else 0,
|
| 215 |
+
"avg_reward": total_reward / total_episodes if total_episodes else 0,
|
| 216 |
+
"n_episodes": total_episodes,
|
| 217 |
+
}
|
server/scenarios.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
server/stack_doctor_environment.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Stack Doctor Environment.
|
| 3 |
+
|
| 4 |
+
An overseer LLM diagnoses sick inference stacks by probing subsystems,
|
| 5 |
+
reconciling conflicting specialist-agent reports, and selecting the
|
| 6 |
+
minimal correct fix.
|
| 7 |
+
|
| 8 |
+
Inspired by real SM12x enablement bugs across vLLM, FlashInfer, SGLang,
|
| 9 |
+
CUTLASS, and Flash-Attention.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
from uuid import uuid4
|
| 16 |
+
|
| 17 |
+
from openenv.core.env_server.interfaces import Environment
|
| 18 |
+
from openenv.core.env_server.types import State
|
| 19 |
+
|
| 20 |
+
from models import StackDoctorAction, StackDoctorObservation
|
| 21 |
+
from .scenarios import (
|
| 22 |
+
ROOT_CAUSE_TO_FIX,
|
| 23 |
+
FIX_TO_ROOT_CAUSE,
|
| 24 |
+
ROOT_CAUSES,
|
| 25 |
+
FIXES,
|
| 26 |
+
SPECIALISTS,
|
| 27 |
+
Scenario,
|
| 28 |
+
SpecialistOpinion,
|
| 29 |
+
get_scenario,
|
| 30 |
+
randomize_specialist_opinions,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
MAX_STEPS = 6
|
| 34 |
+
|
| 35 |
+
INSPECT_TARGETS = {"logs", "config", "snippet", "metrics"}
|
| 36 |
+
VALID_FIXES = set(FIXES)
|
| 37 |
+
VALID_ROOT_CAUSES = set(ROOT_CAUSES)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class EpisodeState:
|
| 41 |
+
"""Internal mutable episode state (not exposed to agent)."""
|
| 42 |
+
|
| 43 |
+
def __init__(
|
| 44 |
+
self,
|
| 45 |
+
scenario: Scenario,
|
| 46 |
+
specialist_opinions: dict[str, SpecialistOpinion] | None = None,
|
| 47 |
+
):
|
| 48 |
+
self.scenario = scenario
|
| 49 |
+
# Per-episode randomized specialist opinions (falls back to scenario defaults)
|
| 50 |
+
self.specialist_opinions = specialist_opinions or scenario.specialist_opinions
|
| 51 |
+
self.step_count = 0
|
| 52 |
+
self.fix_applied = False
|
| 53 |
+
self.fix_was_correct: bool | None = None
|
| 54 |
+
self.done = False
|
| 55 |
+
self.cumulative_reward = 0.0
|
| 56 |
+
self.actions_taken: list[dict] = []
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class StackDoctorEnvironment(Environment):
|
| 60 |
+
"""
|
| 61 |
+
Stack Doctor: incident-response RL environment for
|
| 62 |
+
inference-stack diagnosis.
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 66 |
+
|
| 67 |
+
def __init__(self):
|
| 68 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 69 |
+
self._episode: EpisodeState | None = None
|
| 70 |
+
|
| 71 |
+
def reset(self, seed=None, episode_id=None, **kwargs) -> StackDoctorObservation:
|
| 72 |
+
scenario_id = kwargs.get("scenario_id")
|
| 73 |
+
split = kwargs.get("split", "train")
|
| 74 |
+
scenario = get_scenario(scenario_id, split=split)
|
| 75 |
+
|
| 76 |
+
self._state = State(
|
| 77 |
+
episode_id=episode_id or str(uuid4()),
|
| 78 |
+
step_count=0,
|
| 79 |
+
)
|
| 80 |
+
randomized_opinions = randomize_specialist_opinions(scenario)
|
| 81 |
+
self._episode = EpisodeState(scenario, specialist_opinions=randomized_opinions)
|
| 82 |
+
|
| 83 |
+
specialist_obs = {}
|
| 84 |
+
for name, op in randomized_opinions.items():
|
| 85 |
+
specialist_obs[name] = {
|
| 86 |
+
"opinion": op.opinion,
|
| 87 |
+
"confidence": op.confidence,
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
return StackDoctorObservation(
|
| 91 |
+
output=(
|
| 92 |
+
"STACK DOCTOR — New incident assigned.\n"
|
| 93 |
+
"Diagnose the root cause, optionally apply a fix, then submit your diagnosis.\n"
|
| 94 |
+
"You have 6 steps. Use them wisely.\n\n"
|
| 95 |
+
"Available actions (send as JSON):\n"
|
| 96 |
+
' {"type":"inspect","target":"logs|config|snippet|metrics"}\n'
|
| 97 |
+
' {"type":"ask_specialist","specialist":"runtime|dispatch|kernel|loader"}\n'
|
| 98 |
+
' {"type":"apply_fix","fix":"relax_arch_check|add_whitelist_entry|fix_runtime_path|switch_backend|update_model_config|fix_weight_mapping|tune_memory_config|fix_quantization|fix_comm_config|update_driver_config"}\n'
|
| 99 |
+
' {"type":"submit","root_cause":"...","fix":"...","justification":"reason for diagnosis"}\n'
|
| 100 |
+
),
|
| 101 |
+
incident_ticket=scenario.incident_ticket,
|
| 102 |
+
hardware=scenario.hardware,
|
| 103 |
+
model_name=scenario.model_name,
|
| 104 |
+
backend=scenario.backend,
|
| 105 |
+
log_excerpt=scenario.initial_log,
|
| 106 |
+
code_snippet=scenario.initial_snippet,
|
| 107 |
+
specialist_opinions=specialist_obs,
|
| 108 |
+
steps_remaining=MAX_STEPS,
|
| 109 |
+
fix_used=False,
|
| 110 |
+
done=False,
|
| 111 |
+
reward=0.0,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
def step(self, action: StackDoctorAction, **kwargs) -> StackDoctorObservation:
|
| 115 |
+
ep = self._episode
|
| 116 |
+
if ep is None or ep.done:
|
| 117 |
+
return self._terminal_obs("Episode is over. Call reset() to start a new incident.", 0.0)
|
| 118 |
+
|
| 119 |
+
self._state.step_count += 1
|
| 120 |
+
ep.step_count += 1
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
parsed = json.loads(action.message)
|
| 124 |
+
except (json.JSONDecodeError, TypeError):
|
| 125 |
+
return self._handle_invalid(ep, f"Invalid JSON: {action.message[:200]}")
|
| 126 |
+
|
| 127 |
+
action_type = parsed.get("type")
|
| 128 |
+
|
| 129 |
+
if action_type == "inspect":
|
| 130 |
+
return self._handle_inspect(ep, parsed)
|
| 131 |
+
elif action_type == "ask_specialist":
|
| 132 |
+
return self._handle_ask_specialist(ep, parsed)
|
| 133 |
+
elif action_type == "apply_fix":
|
| 134 |
+
return self._handle_apply_fix(ep, parsed)
|
| 135 |
+
elif action_type == "submit":
|
| 136 |
+
return self._handle_submit(ep, parsed)
|
| 137 |
+
else:
|
| 138 |
+
return self._handle_invalid(ep, f"Unknown action type: {action_type}")
|
| 139 |
+
|
| 140 |
+
@property
|
| 141 |
+
def state(self) -> State:
|
| 142 |
+
return self._state
|
| 143 |
+
|
| 144 |
+
def _handle_inspect(self, ep: EpisodeState, parsed: dict) -> StackDoctorObservation:
|
| 145 |
+
target = parsed.get("target")
|
| 146 |
+
if target not in INSPECT_TARGETS:
|
| 147 |
+
return self._handle_invalid(ep, f"Invalid inspect target: {target}. Use: {INSPECT_TARGETS}")
|
| 148 |
+
|
| 149 |
+
reward = -0.25
|
| 150 |
+
ep.cumulative_reward += reward
|
| 151 |
+
ep.actions_taken.append({"type": "inspect", "target": target})
|
| 152 |
+
|
| 153 |
+
ir = ep.scenario.inspect_results
|
| 154 |
+
result_map = {"logs": ir.logs, "config": ir.config, "snippet": ir.snippet, "metrics": ir.metrics}
|
| 155 |
+
|
| 156 |
+
return self._step_obs(ep, output=f"[INSPECT {target.upper()}]\n{result_map[target]}", reward=reward)
|
| 157 |
+
|
| 158 |
+
def _handle_ask_specialist(self, ep: EpisodeState, parsed: dict) -> StackDoctorObservation:
|
| 159 |
+
specialist = parsed.get("specialist")
|
| 160 |
+
if specialist not in SPECIALISTS:
|
| 161 |
+
return self._handle_invalid(ep, f"Invalid specialist: {specialist}. Use: {SPECIALISTS}")
|
| 162 |
+
|
| 163 |
+
reward = -0.25
|
| 164 |
+
ep.cumulative_reward += reward
|
| 165 |
+
ep.actions_taken.append({"type": "ask_specialist", "specialist": specialist})
|
| 166 |
+
|
| 167 |
+
followup = ep.scenario.specialist_followups.get(specialist, "No additional information.")
|
| 168 |
+
return self._step_obs(ep, output=f"[SPECIALIST: {specialist.upper()}]\n{followup}", reward=reward)
|
| 169 |
+
|
| 170 |
+
def _handle_apply_fix(self, ep: EpisodeState, parsed: dict) -> StackDoctorObservation:
|
| 171 |
+
if ep.fix_applied:
|
| 172 |
+
return self._handle_invalid(ep, "apply_fix already used this episode. You can only apply one fix.")
|
| 173 |
+
|
| 174 |
+
fix = parsed.get("fix")
|
| 175 |
+
if fix not in VALID_FIXES:
|
| 176 |
+
return self._handle_invalid(ep, f"Invalid fix: {fix}. Use one of: {sorted(VALID_FIXES)}")
|
| 177 |
+
|
| 178 |
+
ep.fix_applied = True
|
| 179 |
+
is_correct = fix == ep.scenario.correct_fix
|
| 180 |
+
ep.fix_was_correct = is_correct
|
| 181 |
+
|
| 182 |
+
reward = 3.0 if is_correct else -2.0
|
| 183 |
+
ep.cumulative_reward += reward
|
| 184 |
+
ep.actions_taken.append({"type": "apply_fix", "fix": fix, "correct": is_correct})
|
| 185 |
+
|
| 186 |
+
if is_correct:
|
| 187 |
+
output = f"[FIX APPLIED: {fix}] Fix applied successfully. Systems recovering. Now submit your diagnosis."
|
| 188 |
+
else:
|
| 189 |
+
output = f"[FIX APPLIED: {fix}] Fix applied but the issue persists. Consider your diagnosis carefully."
|
| 190 |
+
|
| 191 |
+
return self._step_obs(ep, output=output, reward=reward)
|
| 192 |
+
|
| 193 |
+
def _handle_submit(self, ep: EpisodeState, parsed: dict) -> StackDoctorObservation:
|
| 194 |
+
root_cause = parsed.get("root_cause")
|
| 195 |
+
fix = parsed.get("fix")
|
| 196 |
+
justification = parsed.get("justification", "")
|
| 197 |
+
|
| 198 |
+
if root_cause not in VALID_ROOT_CAUSES:
|
| 199 |
+
return self._handle_invalid(ep, f"Invalid root_cause: {root_cause}. Use one of: {sorted(VALID_ROOT_CAUSES)}")
|
| 200 |
+
if fix not in VALID_FIXES:
|
| 201 |
+
return self._handle_invalid(ep, f"Invalid fix: {fix}. Use one of: {sorted(VALID_FIXES)}")
|
| 202 |
+
|
| 203 |
+
ep.done = True
|
| 204 |
+
correct_rc = ep.scenario.root_cause
|
| 205 |
+
correct_fix = ep.scenario.correct_fix
|
| 206 |
+
rc_correct = root_cause == correct_rc
|
| 207 |
+
fix_correct = fix == correct_fix
|
| 208 |
+
has_justification = len(justification.strip()) >= 10
|
| 209 |
+
|
| 210 |
+
reward = 0.0
|
| 211 |
+
reward += 8.0 if rc_correct else -4.0
|
| 212 |
+
reward += 8.0 if fix_correct else -4.0
|
| 213 |
+
if (rc_correct and fix_correct) and ep.step_count <= 4:
|
| 214 |
+
reward += 2.0
|
| 215 |
+
if has_justification:
|
| 216 |
+
reward += 1.0
|
| 217 |
+
|
| 218 |
+
ep.cumulative_reward += reward
|
| 219 |
+
ep.actions_taken.append({
|
| 220 |
+
"type": "submit", "root_cause": root_cause, "fix": fix,
|
| 221 |
+
"justification": justification,
|
| 222 |
+
"rc_correct": rc_correct, "fix_correct": fix_correct,
|
| 223 |
+
"has_justification": has_justification,
|
| 224 |
+
})
|
| 225 |
+
|
| 226 |
+
output_lines = ["[DIAGNOSIS SUBMITTED]"]
|
| 227 |
+
output_lines.append(f" Root cause: {root_cause} — {'CORRECT' if rc_correct else 'WRONG (was: ' + correct_rc + ')'}")
|
| 228 |
+
output_lines.append(f" Fix: {fix} — {'CORRECT' if fix_correct else 'WRONG (was: ' + correct_fix + ')'}")
|
| 229 |
+
if has_justification:
|
| 230 |
+
output_lines.append(f" Justification: {justification.strip()}")
|
| 231 |
+
output_lines.append(" JUSTIFICATION BONUS: +1")
|
| 232 |
+
else:
|
| 233 |
+
output_lines.append(" No justification provided (missed +1 bonus)")
|
| 234 |
+
output_lines.append(f" Steps used: {ep.step_count}/{MAX_STEPS}")
|
| 235 |
+
if rc_correct and fix_correct and ep.step_count <= 4:
|
| 236 |
+
output_lines.append(" EFFICIENCY BONUS: +2 (solved in <= 4 steps)")
|
| 237 |
+
output_lines.append(f" Episode reward: {ep.cumulative_reward:.2f}")
|
| 238 |
+
|
| 239 |
+
return self._terminal_obs("\n".join(output_lines), reward)
|
| 240 |
+
|
| 241 |
+
def _handle_invalid(self, ep: EpisodeState, msg: str) -> StackDoctorObservation:
|
| 242 |
+
reward = -2.0
|
| 243 |
+
ep.cumulative_reward += reward
|
| 244 |
+
ep.actions_taken.append({"type": "invalid", "message": msg})
|
| 245 |
+
|
| 246 |
+
if ep.step_count >= MAX_STEPS:
|
| 247 |
+
ep.done = True
|
| 248 |
+
return self._terminal_obs(f"[INVALID ACTION] {msg}\n[EPISODE OVER] Max steps reached. Auto-fail.", reward)
|
| 249 |
+
|
| 250 |
+
return self._step_obs(ep, output=f"[INVALID ACTION] {msg}", reward=reward)
|
| 251 |
+
|
| 252 |
+
def _step_obs(self, ep: EpisodeState, output: str, reward: float) -> StackDoctorObservation:
|
| 253 |
+
remaining = MAX_STEPS - ep.step_count
|
| 254 |
+
if remaining <= 0 and not ep.done:
|
| 255 |
+
ep.done = True
|
| 256 |
+
timeout_penalty = -4.0
|
| 257 |
+
reward += timeout_penalty
|
| 258 |
+
ep.cumulative_reward += timeout_penalty
|
| 259 |
+
output += "\n\n[EPISODE OVER] Max steps reached without submission. Auto-fail. Reward: -4"
|
| 260 |
+
|
| 261 |
+
return StackDoctorObservation(
|
| 262 |
+
output=output, incident_ticket=ep.scenario.incident_ticket,
|
| 263 |
+
hardware=ep.scenario.hardware, model_name=ep.scenario.model_name,
|
| 264 |
+
backend=ep.scenario.backend, log_excerpt="", code_snippet="",
|
| 265 |
+
specialist_opinions={}, steps_remaining=remaining, fix_used=ep.fix_applied,
|
| 266 |
+
done=ep.done, reward=reward,
|
| 267 |
+
metadata={"cumulative_reward": ep.cumulative_reward, "step": ep.step_count, "scenario_id": ep.scenario.id},
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
def _terminal_obs(self, output: str, reward: float) -> StackDoctorObservation:
|
| 271 |
+
ep = self._episode
|
| 272 |
+
return StackDoctorObservation(
|
| 273 |
+
output=output, incident_ticket=ep.scenario.incident_ticket if ep else "",
|
| 274 |
+
hardware=ep.scenario.hardware if ep else "", model_name=ep.scenario.model_name if ep else "",
|
| 275 |
+
backend=ep.scenario.backend if ep else "", log_excerpt="", code_snippet="",
|
| 276 |
+
specialist_opinions={}, steps_remaining=0, fix_used=ep.fix_applied if ep else False,
|
| 277 |
+
done=True, reward=reward,
|
| 278 |
+
metadata={"cumulative_reward": ep.cumulative_reward if ep else 0.0, "step": ep.step_count if ep else 0, "scenario_id": ep.scenario.id if ep else ""},
|
| 279 |
+
)
|
server/stack_doctor_mcp.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Stack Doctor MCP Environment.
|
| 3 |
+
|
| 4 |
+
Wraps the core Stack Doctor environment with MCP tools that agents
|
| 5 |
+
can discover and invoke. This is the agent-facing interface —
|
| 6 |
+
agents call tools like read_log(), query_specialist(), submit_diagnosis()
|
| 7 |
+
instead of constructing JSON action strings.
|
| 8 |
+
|
| 9 |
+
The training (WebSocket) API still works through _step_impl().
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
from typing import Any, Optional
|
| 16 |
+
from uuid import uuid4
|
| 17 |
+
|
| 18 |
+
from mcp.server.fastmcp import FastMCP
|
| 19 |
+
from openenv.core.env_server.mcp_environment import MCPEnvironment
|
| 20 |
+
from openenv.core.env_server.types import Action, Observation, State
|
| 21 |
+
|
| 22 |
+
from models import StackDoctorAction, StackDoctorObservation
|
| 23 |
+
from .scenarios import (
|
| 24 |
+
ROOT_CAUSE_TO_FIX,
|
| 25 |
+
FIX_TO_ROOT_CAUSE,
|
| 26 |
+
ROOT_CAUSES,
|
| 27 |
+
FIXES,
|
| 28 |
+
SPECIALISTS,
|
| 29 |
+
Scenario,
|
| 30 |
+
get_scenario,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
MAX_STEPS = 6
|
| 34 |
+
VALID_FIXES = set(FIXES)
|
| 35 |
+
VALID_ROOT_CAUSES = set(ROOT_CAUSES)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class StackDoctorMCPEnvironment(MCPEnvironment):
|
| 39 |
+
"""
|
| 40 |
+
Stack Doctor with MCP tool interface for agent interaction.
|
| 41 |
+
|
| 42 |
+
Agents discover available tools (read_log, check_config, view_code,
|
| 43 |
+
run_diagnostic, query_specialist, apply_fix, submit_diagnosis) and
|
| 44 |
+
call them to investigate incidents and submit diagnoses.
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 48 |
+
|
| 49 |
+
def __init__(self):
|
| 50 |
+
mcp = FastMCP("stack_doctor")
|
| 51 |
+
self._state_obj = State(episode_id=str(uuid4()), step_count=0)
|
| 52 |
+
self._scenario: Scenario | None = None
|
| 53 |
+
self._step_count = 0
|
| 54 |
+
self._fix_applied = False
|
| 55 |
+
self._fix_was_correct: bool | None = None
|
| 56 |
+
self._done = False
|
| 57 |
+
self._cumulative_reward = 0.0
|
| 58 |
+
self._actions_taken: list[dict] = []
|
| 59 |
+
|
| 60 |
+
env = self # capture for closures
|
| 61 |
+
|
| 62 |
+
@mcp.tool()
|
| 63 |
+
def read_log() -> str:
|
| 64 |
+
"""Read system and application logs for the current incident.
|
| 65 |
+
Returns log output from the affected inference stack including
|
| 66 |
+
error messages, warnings, and system state information.
|
| 67 |
+
Costs 1 step (-0.25 reward)."""
|
| 68 |
+
return env._do_inspect("logs")
|
| 69 |
+
|
| 70 |
+
@mcp.tool()
|
| 71 |
+
def check_config() -> str:
|
| 72 |
+
"""Check configuration files for the current incident.
|
| 73 |
+
Returns relevant configuration parameters including GPU settings,
|
| 74 |
+
backend configuration, model parameters, and environment variables.
|
| 75 |
+
Costs 1 step (-0.25 reward)."""
|
| 76 |
+
return env._do_inspect("config")
|
| 77 |
+
|
| 78 |
+
@mcp.tool()
|
| 79 |
+
def view_code() -> str:
|
| 80 |
+
"""View relevant source code snippets for the current incident.
|
| 81 |
+
Returns code from the affected component showing the likely
|
| 82 |
+
location of the bug or misconfiguration.
|
| 83 |
+
Costs 1 step (-0.25 reward)."""
|
| 84 |
+
return env._do_inspect("snippet")
|
| 85 |
+
|
| 86 |
+
@mcp.tool()
|
| 87 |
+
def run_diagnostic() -> str:
|
| 88 |
+
"""Run performance diagnostics and metrics collection.
|
| 89 |
+
Returns metrics like latency, throughput, GPU utilization,
|
| 90 |
+
error rates, and memory usage for the affected system.
|
| 91 |
+
Costs 1 step (-0.25 reward)."""
|
| 92 |
+
return env._do_inspect("metrics")
|
| 93 |
+
|
| 94 |
+
@mcp.tool()
|
| 95 |
+
def query_specialist(specialist: str) -> str:
|
| 96 |
+
"""Ask a specialist for their analysis of the incident.
|
| 97 |
+
Specialists: 'runtime', 'dispatch', 'kernel', 'loader'.
|
| 98 |
+
WARNING: At least one specialist gives wrong advice per incident.
|
| 99 |
+
Cross-verify specialist opinions before trusting them.
|
| 100 |
+
Costs 1 step (-0.25 reward)."""
|
| 101 |
+
return env._do_ask_specialist(specialist)
|
| 102 |
+
|
| 103 |
+
@mcp.tool()
|
| 104 |
+
def apply_fix(fix: str) -> str:
|
| 105 |
+
"""Apply a fix to the system. Can only be used ONCE per incident.
|
| 106 |
+
Available fixes: 'relax_arch_check', 'add_whitelist_entry',
|
| 107 |
+
'fix_runtime_path', 'switch_backend', 'update_model_config',
|
| 108 |
+
'fix_weight_mapping', 'tune_memory_config', 'fix_quantization',
|
| 109 |
+
'fix_comm_config', 'update_driver_config'.
|
| 110 |
+
Correct fix: +3 reward. Wrong fix: -2 reward."""
|
| 111 |
+
return env._do_apply_fix(fix)
|
| 112 |
+
|
| 113 |
+
@mcp.tool()
|
| 114 |
+
def submit_diagnosis(root_cause: str, fix: str, justification: str = "") -> str:
|
| 115 |
+
"""Submit your final diagnosis. This ends the episode.
|
| 116 |
+
Root causes: 'arch_guard', 'backend_whitelist', 'runtime_loader',
|
| 117 |
+
'backend_selector', 'model_config', 'weight_layout',
|
| 118 |
+
'memory_oom', 'quantization_error', 'distributed_comm', 'driver_compat'.
|
| 119 |
+
Fixes: 'relax_arch_check', 'add_whitelist_entry', 'fix_runtime_path',
|
| 120 |
+
'switch_backend', 'update_model_config', 'fix_weight_mapping',
|
| 121 |
+
'tune_memory_config', 'fix_quantization', 'fix_comm_config', 'update_driver_config'.
|
| 122 |
+
justification: A short sentence explaining WHY you chose this root cause
|
| 123 |
+
and fix based on the evidence you gathered. Bonus +1 if provided.
|
| 124 |
+
Correct root_cause: +8. Wrong: -4. Correct fix: +8. Wrong: -4.
|
| 125 |
+
Bonus +2 if solved in 4 or fewer steps. Bonus +1 for justification."""
|
| 126 |
+
return env._do_submit(root_cause, fix, justification)
|
| 127 |
+
|
| 128 |
+
super().__init__(mcp)
|
| 129 |
+
|
| 130 |
+
# ------------------------------------------------------------------
|
| 131 |
+
# MCP tool implementations
|
| 132 |
+
# ------------------------------------------------------------------
|
| 133 |
+
|
| 134 |
+
def _check_episode(self) -> str | None:
|
| 135 |
+
"""Return error message if episode is not active."""
|
| 136 |
+
if self._scenario is None:
|
| 137 |
+
return "No active incident. Call reset() first."
|
| 138 |
+
if self._done:
|
| 139 |
+
return "Episode is over. Call reset() to start a new incident."
|
| 140 |
+
if self._step_count >= MAX_STEPS:
|
| 141 |
+
self._done = True
|
| 142 |
+
return "Max steps reached. Episode over."
|
| 143 |
+
return None
|
| 144 |
+
|
| 145 |
+
def _record_step(self, reward: float, action: dict) -> None:
|
| 146 |
+
self._step_count += 1
|
| 147 |
+
self._state_obj.step_count = self._step_count
|
| 148 |
+
self._cumulative_reward += reward
|
| 149 |
+
self._actions_taken.append(action)
|
| 150 |
+
|
| 151 |
+
def _do_inspect(self, target: str) -> str:
|
| 152 |
+
err = self._check_episode()
|
| 153 |
+
if err:
|
| 154 |
+
return err
|
| 155 |
+
|
| 156 |
+
ir = self._scenario.inspect_results
|
| 157 |
+
result_map = {
|
| 158 |
+
"logs": ir.logs,
|
| 159 |
+
"config": ir.config,
|
| 160 |
+
"snippet": ir.snippet,
|
| 161 |
+
"metrics": ir.metrics,
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
self._record_step(-0.25, {"type": "inspect", "target": target})
|
| 165 |
+
|
| 166 |
+
remaining = MAX_STEPS - self._step_count
|
| 167 |
+
return (
|
| 168 |
+
f"[INSPECT {target.upper()}]\n"
|
| 169 |
+
f"{result_map[target]}\n\n"
|
| 170 |
+
f"[Steps remaining: {remaining} | Reward: -0.25 | Cumulative: {self._cumulative_reward:.2f}]"
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
def _do_ask_specialist(self, specialist: str) -> str:
|
| 174 |
+
err = self._check_episode()
|
| 175 |
+
if err:
|
| 176 |
+
return err
|
| 177 |
+
|
| 178 |
+
if specialist not in SPECIALISTS:
|
| 179 |
+
self._record_step(-2.0, {"type": "invalid", "message": f"Unknown specialist: {specialist}"})
|
| 180 |
+
return f"Invalid specialist '{specialist}'. Available: {SPECIALISTS}. Penalty: -2.0"
|
| 181 |
+
|
| 182 |
+
followup = self._scenario.specialist_followups.get(specialist, "No additional information.")
|
| 183 |
+
self._record_step(-0.25, {"type": "ask_specialist", "specialist": specialist})
|
| 184 |
+
|
| 185 |
+
remaining = MAX_STEPS - self._step_count
|
| 186 |
+
return (
|
| 187 |
+
f"[SPECIALIST: {specialist.upper()}]\n"
|
| 188 |
+
f"{followup}\n\n"
|
| 189 |
+
f"[Steps remaining: {remaining} | Reward: -0.25 | Cumulative: {self._cumulative_reward:.2f}]"
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
def _do_apply_fix(self, fix: str) -> str:
|
| 193 |
+
err = self._check_episode()
|
| 194 |
+
if err:
|
| 195 |
+
return err
|
| 196 |
+
|
| 197 |
+
if self._fix_applied:
|
| 198 |
+
self._record_step(-2.0, {"type": "invalid", "message": "Fix already applied"})
|
| 199 |
+
return "You already applied a fix this episode. Only one fix allowed. Penalty: -2.0"
|
| 200 |
+
|
| 201 |
+
if fix not in VALID_FIXES:
|
| 202 |
+
self._record_step(-2.0, {"type": "invalid", "message": f"Invalid fix: {fix}"})
|
| 203 |
+
return f"Invalid fix '{fix}'. Available: {sorted(VALID_FIXES)}. Penalty: -2.0"
|
| 204 |
+
|
| 205 |
+
self._fix_applied = True
|
| 206 |
+
is_correct = fix == self._scenario.correct_fix
|
| 207 |
+
self._fix_was_correct = is_correct
|
| 208 |
+
reward = 3.0 if is_correct else -2.0
|
| 209 |
+
self._record_step(reward, {"type": "apply_fix", "fix": fix, "correct": is_correct})
|
| 210 |
+
|
| 211 |
+
remaining = MAX_STEPS - self._step_count
|
| 212 |
+
if is_correct:
|
| 213 |
+
return (
|
| 214 |
+
f"[FIX APPLIED: {fix}] Fix applied successfully. Systems recovering.\n"
|
| 215 |
+
f"Now submit your diagnosis with submit_diagnosis().\n\n"
|
| 216 |
+
f"[Steps remaining: {remaining} | Reward: +3.0 | Cumulative: {self._cumulative_reward:.2f}]"
|
| 217 |
+
)
|
| 218 |
+
else:
|
| 219 |
+
return (
|
| 220 |
+
f"[FIX APPLIED: {fix}] Fix applied but the issue persists.\n"
|
| 221 |
+
f"Consider your diagnosis carefully.\n\n"
|
| 222 |
+
f"[Steps remaining: {remaining} | Reward: -2.0 | Cumulative: {self._cumulative_reward:.2f}]"
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
def _do_submit(self, root_cause: str, fix: str, justification: str = "") -> str:
|
| 226 |
+
err = self._check_episode()
|
| 227 |
+
if err:
|
| 228 |
+
return err
|
| 229 |
+
|
| 230 |
+
if root_cause not in VALID_ROOT_CAUSES:
|
| 231 |
+
self._record_step(-2.0, {"type": "invalid", "message": f"Invalid root_cause: {root_cause}"})
|
| 232 |
+
return f"Invalid root_cause '{root_cause}'. Available: {sorted(VALID_ROOT_CAUSES)}. Penalty: -2.0"
|
| 233 |
+
|
| 234 |
+
if fix not in VALID_FIXES:
|
| 235 |
+
self._record_step(-2.0, {"type": "invalid", "message": f"Invalid fix: {fix}"})
|
| 236 |
+
return f"Invalid fix '{fix}'. Available: {sorted(VALID_FIXES)}. Penalty: -2.0"
|
| 237 |
+
|
| 238 |
+
self._done = True
|
| 239 |
+
rc_correct = root_cause == self._scenario.root_cause
|
| 240 |
+
fix_correct = fix == self._scenario.correct_fix
|
| 241 |
+
has_justification = len(justification.strip()) >= 10
|
| 242 |
+
|
| 243 |
+
reward = 0.0
|
| 244 |
+
reward += 8.0 if rc_correct else -4.0
|
| 245 |
+
reward += 8.0 if fix_correct else -4.0
|
| 246 |
+
if rc_correct and fix_correct and self._step_count + 1 <= 4:
|
| 247 |
+
reward += 2.0
|
| 248 |
+
if has_justification:
|
| 249 |
+
reward += 1.0
|
| 250 |
+
|
| 251 |
+
self._record_step(reward, {
|
| 252 |
+
"type": "submit", "root_cause": root_cause, "fix": fix,
|
| 253 |
+
"justification": justification,
|
| 254 |
+
"rc_correct": rc_correct, "fix_correct": fix_correct,
|
| 255 |
+
"has_justification": has_justification,
|
| 256 |
+
})
|
| 257 |
+
|
| 258 |
+
lines = ["[DIAGNOSIS SUBMITTED]"]
|
| 259 |
+
lines.append(f" Root cause: {root_cause} — {'CORRECT' if rc_correct else 'WRONG (was: ' + self._scenario.root_cause + ')'}")
|
| 260 |
+
lines.append(f" Fix: {fix} — {'CORRECT' if fix_correct else 'WRONG (was: ' + self._scenario.correct_fix + ')'}")
|
| 261 |
+
if has_justification:
|
| 262 |
+
lines.append(f" Justification: {justification.strip()}")
|
| 263 |
+
lines.append(" JUSTIFICATION BONUS: +1")
|
| 264 |
+
else:
|
| 265 |
+
lines.append(" No justification provided (missed +1 bonus)")
|
| 266 |
+
lines.append(f" Steps used: {self._step_count}/{MAX_STEPS}")
|
| 267 |
+
if rc_correct and fix_correct and self._step_count <= 4:
|
| 268 |
+
lines.append(" EFFICIENCY BONUS: +2 (solved in <= 4 steps)")
|
| 269 |
+
lines.append(f" Episode reward: {self._cumulative_reward:.2f}")
|
| 270 |
+
|
| 271 |
+
return "\n".join(lines)
|
| 272 |
+
|
| 273 |
+
# ------------------------------------------------------------------
|
| 274 |
+
# OpenEnv Environment interface (for training / WebSocket API)
|
| 275 |
+
# ------------------------------------------------------------------
|
| 276 |
+
|
| 277 |
+
def reset(self, seed=None, episode_id=None, **kwargs) -> StackDoctorObservation:
|
| 278 |
+
scenario_id = kwargs.get("scenario_id")
|
| 279 |
+
split = kwargs.get("split", "train")
|
| 280 |
+
self._scenario = get_scenario(scenario_id, split=split)
|
| 281 |
+
|
| 282 |
+
self._state_obj = State(
|
| 283 |
+
episode_id=episode_id or str(uuid4()),
|
| 284 |
+
step_count=0,
|
| 285 |
+
)
|
| 286 |
+
self._step_count = 0
|
| 287 |
+
self._fix_applied = False
|
| 288 |
+
self._fix_was_correct = None
|
| 289 |
+
self._done = False
|
| 290 |
+
self._cumulative_reward = 0.0
|
| 291 |
+
self._actions_taken = []
|
| 292 |
+
|
| 293 |
+
specialist_obs = {}
|
| 294 |
+
for name, op in self._scenario.specialist_opinions.items():
|
| 295 |
+
specialist_obs[name] = {
|
| 296 |
+
"opinion": op.opinion,
|
| 297 |
+
"confidence": op.confidence,
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
return StackDoctorObservation(
|
| 301 |
+
output=(
|
| 302 |
+
"STACK DOCTOR — New incident assigned.\n"
|
| 303 |
+
"Investigate using the available tools: read_log(), check_config(), "
|
| 304 |
+
"view_code(), run_diagnostic(), query_specialist(name).\n"
|
| 305 |
+
"When ready, apply_fix(fix) and/or submit_diagnosis(root_cause, fix).\n"
|
| 306 |
+
"You have 6 steps. At least one specialist is WRONG — cross-verify.\n"
|
| 307 |
+
),
|
| 308 |
+
incident_ticket=self._scenario.incident_ticket,
|
| 309 |
+
hardware=self._scenario.hardware,
|
| 310 |
+
model_name=self._scenario.model_name,
|
| 311 |
+
backend=self._scenario.backend,
|
| 312 |
+
log_excerpt=self._scenario.initial_log,
|
| 313 |
+
code_snippet=self._scenario.initial_snippet,
|
| 314 |
+
specialist_opinions=specialist_obs,
|
| 315 |
+
steps_remaining=MAX_STEPS,
|
| 316 |
+
fix_used=False,
|
| 317 |
+
done=False,
|
| 318 |
+
reward=0.0,
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
def _step_impl(
|
| 322 |
+
self,
|
| 323 |
+
action: Action,
|
| 324 |
+
timeout_s: Optional[float] = None,
|
| 325 |
+
**kwargs: Any,
|
| 326 |
+
) -> Observation:
|
| 327 |
+
"""Handle non-MCP actions (JSON action strings for training)."""
|
| 328 |
+
if not isinstance(action, StackDoctorAction):
|
| 329 |
+
return self._make_obs("Invalid action type.", -2.0)
|
| 330 |
+
|
| 331 |
+
try:
|
| 332 |
+
parsed = json.loads(action.message)
|
| 333 |
+
except (json.JSONDecodeError, TypeError):
|
| 334 |
+
return self._make_obs(f"Invalid JSON: {action.message[:200]}", -2.0)
|
| 335 |
+
|
| 336 |
+
action_type = parsed.get("type")
|
| 337 |
+
|
| 338 |
+
if action_type == "inspect":
|
| 339 |
+
result = self._do_inspect(parsed.get("target", "logs"))
|
| 340 |
+
elif action_type == "ask_specialist":
|
| 341 |
+
result = self._do_ask_specialist(parsed.get("specialist", ""))
|
| 342 |
+
elif action_type == "apply_fix":
|
| 343 |
+
result = self._do_apply_fix(parsed.get("fix", ""))
|
| 344 |
+
elif action_type == "submit":
|
| 345 |
+
result = self._do_submit(parsed.get("root_cause", ""), parsed.get("fix", ""), parsed.get("justification", ""))
|
| 346 |
+
else:
|
| 347 |
+
self._record_step(-2.0, {"type": "invalid", "message": f"Unknown: {action_type}"})
|
| 348 |
+
result = f"Unknown action type: {action_type}. Penalty: -2.0"
|
| 349 |
+
|
| 350 |
+
# Extract last reward from actions
|
| 351 |
+
last_reward = 0.0
|
| 352 |
+
if self._actions_taken:
|
| 353 |
+
last = self._actions_taken[-1]
|
| 354 |
+
if last.get("type") == "submit":
|
| 355 |
+
# Calculate submit reward
|
| 356 |
+
rc_c = last.get("rc_correct", False)
|
| 357 |
+
fx_c = last.get("fix_correct", False)
|
| 358 |
+
last_reward = (8.0 if rc_c else -4.0) + (8.0 if fx_c else -4.0)
|
| 359 |
+
if rc_c and fx_c and self._step_count <= 4:
|
| 360 |
+
last_reward += 2.0
|
| 361 |
+
if last.get("has_justification", False):
|
| 362 |
+
last_reward += 1.0
|
| 363 |
+
elif last.get("type") == "apply_fix":
|
| 364 |
+
last_reward = 3.0 if last.get("correct") else -2.0
|
| 365 |
+
elif last.get("type") == "invalid":
|
| 366 |
+
last_reward = -2.0
|
| 367 |
+
else:
|
| 368 |
+
last_reward = -0.25
|
| 369 |
+
|
| 370 |
+
return self._make_obs(result, last_reward)
|
| 371 |
+
|
| 372 |
+
def _make_obs(self, output: str, reward: float) -> StackDoctorObservation:
|
| 373 |
+
remaining = MAX_STEPS - self._step_count
|
| 374 |
+
return StackDoctorObservation(
|
| 375 |
+
output=output,
|
| 376 |
+
incident_ticket=self._scenario.incident_ticket if self._scenario else "",
|
| 377 |
+
hardware=self._scenario.hardware if self._scenario else "",
|
| 378 |
+
model_name=self._scenario.model_name if self._scenario else "",
|
| 379 |
+
backend=self._scenario.backend if self._scenario else "",
|
| 380 |
+
log_excerpt="",
|
| 381 |
+
code_snippet="",
|
| 382 |
+
specialist_opinions={},
|
| 383 |
+
steps_remaining=remaining,
|
| 384 |
+
fix_used=self._fix_applied,
|
| 385 |
+
done=self._done,
|
| 386 |
+
reward=reward,
|
| 387 |
+
metadata={
|
| 388 |
+
"cumulative_reward": self._cumulative_reward,
|
| 389 |
+
"step": self._step_count,
|
| 390 |
+
"scenario_id": self._scenario.id if self._scenario else "",
|
| 391 |
+
},
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
@property
|
| 395 |
+
def state(self) -> State:
|
| 396 |
+
return self._state_obj
|
static/index.html
ADDED
|
@@ -0,0 +1,1566 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Stack Doctor — Incident War Room</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@300;400;500;600&family=Outfit:wght@300;400;500;600;700&display=swap" rel="stylesheet">
|
| 9 |
+
<style>
|
| 10 |
+
*, *::before, *::after { margin: 0; padding: 0; box-sizing: border-box; }
|
| 11 |
+
|
| 12 |
+
:root {
|
| 13 |
+
--bg-abyss: #060a11;
|
| 14 |
+
--bg-deep: #0a0f1a;
|
| 15 |
+
--bg-mid: #0f1623;
|
| 16 |
+
--bg-surface: #151d2e;
|
| 17 |
+
--bg-elevated: #1a2438;
|
| 18 |
+
--border-subtle: rgba(100, 180, 255, 0.08);
|
| 19 |
+
--border-active: rgba(0, 196, 255, 0.25);
|
| 20 |
+
--cyan: #00c4ff;
|
| 21 |
+
--cyan-bright: #40d4ff;
|
| 22 |
+
--cyan-dim: rgba(0, 196, 255, 0.15);
|
| 23 |
+
--cyan-glow: rgba(0, 196, 255, 0.4);
|
| 24 |
+
--amber: #f0a030;
|
| 25 |
+
--amber-dim: rgba(240, 160, 48, 0.15);
|
| 26 |
+
--amber-glow: rgba(240, 160, 48, 0.4);
|
| 27 |
+
--emerald: #00e676;
|
| 28 |
+
--emerald-dim: rgba(0, 230, 118, 0.12);
|
| 29 |
+
--emerald-glow: rgba(0, 230, 118, 0.35);
|
| 30 |
+
--coral: #ff3d5a;
|
| 31 |
+
--coral-dim: rgba(255, 61, 90, 0.12);
|
| 32 |
+
--text-primary: #d8e0ec;
|
| 33 |
+
--text-secondary: rgba(216, 224, 236, 0.55);
|
| 34 |
+
--text-tertiary: rgba(216, 224, 236, 0.3);
|
| 35 |
+
--font-display: 'Outfit', system-ui, sans-serif;
|
| 36 |
+
--font-mono: 'IBM Plex Mono', 'SF Mono', monospace;
|
| 37 |
+
--ease-out-expo: cubic-bezier(0.16, 1, 0.3, 1);
|
| 38 |
+
--duration-slow: 800ms;
|
| 39 |
+
--duration-med: 400ms;
|
| 40 |
+
--duration-fast: 200ms;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
html { height: 100%; }
|
| 44 |
+
body {
|
| 45 |
+
min-height: 100%;
|
| 46 |
+
background: var(--bg-abyss);
|
| 47 |
+
color: var(--text-primary);
|
| 48 |
+
font-family: var(--font-display);
|
| 49 |
+
-webkit-font-smoothing: antialiased;
|
| 50 |
+
overflow-y: auto;
|
| 51 |
+
overflow-x: hidden;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
body::before {
|
| 55 |
+
content: '';
|
| 56 |
+
position: fixed; inset: 0;
|
| 57 |
+
background:
|
| 58 |
+
radial-gradient(ellipse 80% 60% at 20% 80%, rgba(0, 100, 180, 0.06) 0%, transparent 70%),
|
| 59 |
+
radial-gradient(ellipse 60% 50% at 80% 20%, rgba(0, 160, 255, 0.04) 0%, transparent 60%);
|
| 60 |
+
pointer-events: none; z-index: 0;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
.grid-overlay {
|
| 64 |
+
position: fixed; inset: 0; z-index: 0; pointer-events: none;
|
| 65 |
+
background-image:
|
| 66 |
+
linear-gradient(rgba(100, 180, 255, 0.015) 1px, transparent 1px),
|
| 67 |
+
linear-gradient(90deg, rgba(100, 180, 255, 0.015) 1px, transparent 1px);
|
| 68 |
+
background-size: 60px 60px;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
.app {
|
| 72 |
+
position: relative; z-index: 1;
|
| 73 |
+
max-width: 1400px;
|
| 74 |
+
margin: 0 auto;
|
| 75 |
+
padding: 20px 24px 40px;
|
| 76 |
+
display: flex; flex-direction: column; gap: 16px;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/* ══════════ HEADER ══════════ */
|
| 80 |
+
.header {
|
| 81 |
+
display: flex; align-items: center; justify-content: space-between;
|
| 82 |
+
padding: 14px 24px;
|
| 83 |
+
background: linear-gradient(135deg, var(--bg-mid), var(--bg-surface));
|
| 84 |
+
border: 1px solid var(--border-subtle); border-radius: 14px;
|
| 85 |
+
}
|
| 86 |
+
.header-left { display: flex; align-items: center; gap: 14px; }
|
| 87 |
+
.logo-mark {
|
| 88 |
+
width: 36px; height: 36px; border-radius: 9px;
|
| 89 |
+
background: linear-gradient(135deg, var(--cyan), rgba(0, 196, 255, 0.5));
|
| 90 |
+
display: flex; align-items: center; justify-content: center;
|
| 91 |
+
box-shadow: 0 0 24px var(--cyan-dim);
|
| 92 |
+
animation: logoPulse 4s ease-in-out infinite;
|
| 93 |
+
}
|
| 94 |
+
.logo-mark svg { width: 20px; height: 20px; }
|
| 95 |
+
@keyframes logoPulse { 0%,100% { box-shadow: 0 0 20px var(--cyan-dim); } 50% { box-shadow: 0 0 36px var(--cyan-glow); } }
|
| 96 |
+
.header-title { font-weight: 600; font-size: 16px; letter-spacing: 0.5px; }
|
| 97 |
+
.header-subtitle { font-family: var(--font-mono); font-size: 11px; font-weight: 300; color: var(--text-secondary); }
|
| 98 |
+
.header-right { display: flex; align-items: center; gap: 20px; }
|
| 99 |
+
.header-meta-label { font-family: var(--font-mono); font-size: 9px; font-weight: 500; letter-spacing: 1.5px; text-transform: uppercase; color: var(--text-tertiary); }
|
| 100 |
+
.header-meta-value { font-family: var(--font-mono); font-size: 12px; color: var(--cyan); }
|
| 101 |
+
.status-badge {
|
| 102 |
+
font-family: var(--font-mono); font-size: 10px; font-weight: 500;
|
| 103 |
+
letter-spacing: 1px; text-transform: uppercase;
|
| 104 |
+
padding: 5px 14px; border-radius: 20px;
|
| 105 |
+
background: var(--cyan-dim); color: var(--cyan);
|
| 106 |
+
border: 1px solid rgba(0, 196, 255, 0.2);
|
| 107 |
+
transition: all var(--duration-slow) var(--ease-out-expo);
|
| 108 |
+
}
|
| 109 |
+
.status-badge.warning { background: var(--amber-dim); color: var(--amber); border-color: rgba(240, 160, 48, 0.3); }
|
| 110 |
+
.status-badge.success { background: var(--emerald-dim); color: var(--emerald); border-color: rgba(0, 230, 118, 0.3); }
|
| 111 |
+
.status-badge.error { background: var(--coral-dim); color: var(--coral); border-color: rgba(255, 61, 90, 0.3); }
|
| 112 |
+
|
| 113 |
+
/* ══════════ SECTION TITLES ══════════ */
|
| 114 |
+
.section-title {
|
| 115 |
+
font-family: var(--font-mono); font-size: 10px; font-weight: 500;
|
| 116 |
+
letter-spacing: 2px; text-transform: uppercase; color: var(--text-tertiary);
|
| 117 |
+
padding: 8px 0 0;
|
| 118 |
+
display: flex; align-items: center; gap: 10px;
|
| 119 |
+
}
|
| 120 |
+
.section-title::after { content: ''; flex: 1; height: 1px; background: var(--border-subtle); }
|
| 121 |
+
|
| 122 |
+
/* ══════════ TRAINING CHART ══════════ */
|
| 123 |
+
.chart-section {
|
| 124 |
+
display: grid; grid-template-columns: 1fr 1fr; gap: 16px;
|
| 125 |
+
}
|
| 126 |
+
.chart-panel {
|
| 127 |
+
background: linear-gradient(180deg, rgba(15, 22, 35, 0.85), rgba(10, 15, 26, 0.95));
|
| 128 |
+
border: 1px solid var(--border-subtle); border-radius: 14px;
|
| 129 |
+
padding: 24px 28px; position: relative; overflow: hidden;
|
| 130 |
+
}
|
| 131 |
+
.chart-panel-title {
|
| 132 |
+
font-family: var(--font-mono); font-size: 10px; font-weight: 500;
|
| 133 |
+
letter-spacing: 1.5px; text-transform: uppercase; color: var(--text-secondary);
|
| 134 |
+
margin-bottom: 4px;
|
| 135 |
+
}
|
| 136 |
+
.chart-panel-subtitle {
|
| 137 |
+
font-size: 13px; font-weight: 300; color: var(--text-tertiary);
|
| 138 |
+
margin-bottom: 16px;
|
| 139 |
+
}
|
| 140 |
+
.chart-canvas-wrap {
|
| 141 |
+
position: relative; width: 100%; height: 280px;
|
| 142 |
+
}
|
| 143 |
+
canvas { width: 100% !important; height: 100% !important; }
|
| 144 |
+
|
| 145 |
+
.chart-stat-row {
|
| 146 |
+
display: flex; gap: 20px; margin-top: 16px; padding-top: 14px;
|
| 147 |
+
border-top: 1px solid var(--border-subtle);
|
| 148 |
+
}
|
| 149 |
+
.chart-stat { display: flex; flex-direction: column; gap: 2px; }
|
| 150 |
+
.chart-stat-label { font-family: var(--font-mono); font-size: 9px; font-weight: 500; letter-spacing: 1px; text-transform: uppercase; color: var(--text-tertiary); }
|
| 151 |
+
.chart-stat-value { font-family: var(--font-mono); font-size: 18px; font-weight: 300; }
|
| 152 |
+
.chart-stat-value.emerald { color: var(--emerald); }
|
| 153 |
+
.chart-stat-value.coral { color: var(--coral); }
|
| 154 |
+
.chart-stat-value.cyan { color: var(--cyan); }
|
| 155 |
+
.chart-stat-value.amber { color: var(--amber); }
|
| 156 |
+
|
| 157 |
+
/* ══════════ ANNOTATION BADGES ══════════ */
|
| 158 |
+
.annotation {
|
| 159 |
+
position: absolute; font-family: var(--font-mono); font-size: 9px;
|
| 160 |
+
font-weight: 500; letter-spacing: 0.5px; padding: 3px 8px;
|
| 161 |
+
border-radius: 4px; pointer-events: none; white-space: nowrap;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
/* ══════════ WAR ROOM GRID ══════════ */
|
| 165 |
+
.warroom { display: grid; grid-template-columns: 220px 1fr 260px; gap: 14px; }
|
| 166 |
+
|
| 167 |
+
.panel {
|
| 168 |
+
background: linear-gradient(180deg, rgba(15, 22, 35, 0.85), rgba(10, 15, 26, 0.95));
|
| 169 |
+
border: 1px solid var(--border-subtle); border-radius: 14px; overflow: hidden;
|
| 170 |
+
}
|
| 171 |
+
.panel-header {
|
| 172 |
+
padding: 12px 16px 8px; display: flex; align-items: center; gap: 8px;
|
| 173 |
+
border-bottom: 1px solid var(--border-subtle);
|
| 174 |
+
}
|
| 175 |
+
.panel-header-dot {
|
| 176 |
+
width: 6px; height: 6px; border-radius: 50%;
|
| 177 |
+
background: var(--cyan); box-shadow: 0 0 8px var(--cyan-glow);
|
| 178 |
+
animation: dotPulse 3s ease-in-out infinite;
|
| 179 |
+
}
|
| 180 |
+
@keyframes dotPulse { 0%,100% { opacity: 0.6; } 50% { opacity: 1; } }
|
| 181 |
+
.panel-header-title { font-family: var(--font-mono); font-size: 10px; font-weight: 500; letter-spacing: 1.5px; text-transform: uppercase; color: var(--text-secondary); }
|
| 182 |
+
|
| 183 |
+
/* ══════════ ARCHITECTURE DIAGRAM ══════════ */
|
| 184 |
+
.arch-body {
|
| 185 |
+
display: flex; flex-direction: column; align-items: center;
|
| 186 |
+
padding: 16px 14px; gap: 0;
|
| 187 |
+
}
|
| 188 |
+
.arch-layer {
|
| 189 |
+
width: 100%; padding: 12px;
|
| 190 |
+
background: var(--bg-deep); border: 1px solid var(--border-subtle); border-radius: 8px;
|
| 191 |
+
transition: all var(--duration-slow) var(--ease-out-expo);
|
| 192 |
+
}
|
| 193 |
+
.arch-layer .layer-name { font-family: var(--font-mono); font-size: 10px; font-weight: 500; letter-spacing: 1.2px; text-transform: uppercase; color: var(--text-secondary); transition: color var(--duration-slow); }
|
| 194 |
+
.arch-layer .layer-detail { font-family: var(--font-mono); font-size: 9px; font-weight: 300; color: var(--text-tertiary); margin-top: 2px; }
|
| 195 |
+
.arch-layer.scanning { border-color: rgba(0, 196, 255, 0.3); background: linear-gradient(135deg, rgba(0, 196, 255, 0.06), var(--bg-deep)); box-shadow: 0 0 20px var(--cyan-dim); }
|
| 196 |
+
.arch-layer.scanning .layer-name { color: var(--cyan); }
|
| 197 |
+
.arch-layer.identified { border-color: rgba(240, 160, 48, 0.4); background: linear-gradient(135deg, rgba(240, 160, 48, 0.08), var(--bg-deep)); box-shadow: 0 0 25px var(--amber-dim); animation: identPulse 2s ease-in-out infinite; }
|
| 198 |
+
.arch-layer.identified .layer-name { color: var(--amber); }
|
| 199 |
+
@keyframes identPulse { 0%,100% { box-shadow: 0 0 20px var(--amber-dim); } 50% { box-shadow: 0 0 35px rgba(240, 160, 48, 0.25); } }
|
| 200 |
+
.arch-layer.resolved { border-color: rgba(0, 230, 118, 0.3); background: linear-gradient(135deg, rgba(0, 230, 118, 0.06), var(--bg-deep)); box-shadow: 0 0 20px var(--emerald-dim); }
|
| 201 |
+
.arch-layer.resolved .layer-name { color: var(--emerald); }
|
| 202 |
+
|
| 203 |
+
.arch-connector { width: 1px; height: 10px; background: linear-gradient(180deg, var(--border-active), transparent); position: relative; }
|
| 204 |
+
.arch-connector .data-dot { width: 3px; height: 3px; border-radius: 50%; background: var(--cyan); position: absolute; left: -1px; animation: flowDown 2s linear infinite; opacity: 0.6; }
|
| 205 |
+
@keyframes flowDown { 0% { top: 0; opacity: 0; } 20% { opacity: 0.8; } 80% { opacity: 0.8; } 100% { top: 100%; opacity: 0; } }
|
| 206 |
+
|
| 207 |
+
/* ══════════ INVESTIGATION LOG ══════════ */
|
| 208 |
+
.log-body {
|
| 209 |
+
height: 380px; overflow-y: auto; padding: 12px 16px;
|
| 210 |
+
display: flex; flex-direction: column; gap: 8px;
|
| 211 |
+
scrollbar-width: thin; scrollbar-color: rgba(100, 180, 255, 0.1) transparent;
|
| 212 |
+
}
|
| 213 |
+
.idle-prompt { display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100%; gap: 12px; padding: 40px; }
|
| 214 |
+
.idle-prompt .idle-text { font-size: 13px; color: var(--text-secondary); text-align: center; line-height: 1.6; }
|
| 215 |
+
|
| 216 |
+
.incident-card {
|
| 217 |
+
background: linear-gradient(135deg, rgba(0, 196, 255, 0.04), var(--bg-surface));
|
| 218 |
+
border: 1px solid rgba(0, 196, 255, 0.12); border-radius: 10px;
|
| 219 |
+
padding: 14px; animation: cardIn 0.6s var(--ease-out-expo) both;
|
| 220 |
+
}
|
| 221 |
+
@keyframes cardIn { from { opacity: 0; transform: translateY(8px); } }
|
| 222 |
+
.incident-label { font-family: var(--font-mono); font-size: 9px; font-weight: 500; letter-spacing: 1.5px; text-transform: uppercase; color: var(--cyan); margin-bottom: 8px; }
|
| 223 |
+
.incident-text { font-size: 12px; line-height: 1.6; color: var(--text-primary); }
|
| 224 |
+
.incident-meta { display: flex; gap: 16px; margin-top: 10px; flex-wrap: wrap; }
|
| 225 |
+
.incident-meta-item .meta-label { font-family: var(--font-mono); font-size: 8px; font-weight: 500; letter-spacing: 1.2px; text-transform: uppercase; color: var(--text-tertiary); }
|
| 226 |
+
.incident-meta-item .meta-value { font-family: var(--font-mono); font-size: 11px; color: var(--text-secondary); }
|
| 227 |
+
|
| 228 |
+
.log-entry {
|
| 229 |
+
display: flex; gap: 10px; padding: 10px 12px;
|
| 230 |
+
background: var(--bg-deep); border: 1px solid var(--border-subtle); border-radius: 8px;
|
| 231 |
+
animation: entryIn 0.5s var(--ease-out-expo) both;
|
| 232 |
+
}
|
| 233 |
+
@keyframes entryIn { from { opacity: 0; transform: translateX(-12px); } }
|
| 234 |
+
|
| 235 |
+
.log-entry-icon {
|
| 236 |
+
width: 24px; height: 24px; border-radius: 6px;
|
| 237 |
+
display: flex; align-items: center; justify-content: center;
|
| 238 |
+
flex-shrink: 0; font-size: 11px;
|
| 239 |
+
}
|
| 240 |
+
.log-entry-icon.inspect { background: var(--cyan-dim); color: var(--cyan); }
|
| 241 |
+
.log-entry-icon.specialist { background: rgba(160, 120, 255, 0.12); color: #a078ff; }
|
| 242 |
+
.log-entry-icon.fix { background: var(--amber-dim); color: var(--amber); }
|
| 243 |
+
.log-entry-icon.submit { background: var(--emerald-dim); color: var(--emerald); }
|
| 244 |
+
.log-entry-content { flex: 1; min-width: 0; }
|
| 245 |
+
.log-entry-header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 3px; }
|
| 246 |
+
.log-entry-type { font-family: var(--font-mono); font-size: 10px; font-weight: 500; letter-spacing: 0.8px; text-transform: uppercase; }
|
| 247 |
+
.log-entry-type.cyan { color: var(--cyan); }
|
| 248 |
+
.log-entry-type.purple { color: #a078ff; }
|
| 249 |
+
.log-entry-type.amber { color: var(--amber); }
|
| 250 |
+
.log-entry-type.emerald { color: var(--emerald); }
|
| 251 |
+
.log-entry-step { font-family: var(--font-mono); font-size: 9px; color: var(--text-tertiary); }
|
| 252 |
+
.log-entry-text { font-family: var(--font-mono); font-size: 10px; font-weight: 300; line-height: 1.55; color: var(--text-secondary); white-space: pre-wrap; word-break: break-word; }
|
| 253 |
+
.log-entry-reward { font-family: var(--font-mono); font-size: 10px; font-weight: 500; margin-top: 4px; }
|
| 254 |
+
.log-entry-reward.positive { color: var(--emerald); }
|
| 255 |
+
.log-entry-reward.negative { color: var(--coral); }
|
| 256 |
+
|
| 257 |
+
/* ══════════ SPECIALISTS ══════════ */
|
| 258 |
+
.specialists-body { padding: 10px 12px; display: flex; flex-direction: column; gap: 8px; overflow-y: auto; max-height: 420px; }
|
| 259 |
+
.specialist-card {
|
| 260 |
+
background: var(--bg-deep); border: 1px solid var(--border-subtle); border-radius: 10px;
|
| 261 |
+
padding: 10px 12px; transition: all var(--duration-slow) var(--ease-out-expo);
|
| 262 |
+
animation: cardIn 0.5s var(--ease-out-expo) both;
|
| 263 |
+
}
|
| 264 |
+
.specialist-card.highlighted { border-color: rgba(160, 120, 255, 0.3); background: linear-gradient(135deg, rgba(160, 120, 255, 0.05), var(--bg-deep)); }
|
| 265 |
+
.specialist-card.wrong { border-color: rgba(255, 61, 90, 0.15); opacity: 0.5; }
|
| 266 |
+
.specialist-card.correct { border-color: rgba(0, 230, 118, 0.2); }
|
| 267 |
+
.specialist-top { display: flex; align-items: center; justify-content: space-between; margin-bottom: 6px; }
|
| 268 |
+
.specialist-name { font-family: var(--font-mono); font-size: 10px; font-weight: 500; letter-spacing: 1px; text-transform: uppercase; color: var(--text-secondary); transition: color var(--duration-med); }
|
| 269 |
+
.specialist-card.highlighted .specialist-name { color: #a078ff; }
|
| 270 |
+
.specialist-card.correct .specialist-name { color: var(--emerald); }
|
| 271 |
+
.specialist-card.wrong .specialist-name { color: var(--coral); }
|
| 272 |
+
.confidence-bar { width: 50px; height: 3px; background: rgba(255,255,255,0.06); border-radius: 2px; overflow: hidden; }
|
| 273 |
+
.confidence-fill { height: 100%; border-radius: 2px; background: var(--cyan); transition: width 1s var(--ease-out-expo); }
|
| 274 |
+
.specialist-card.wrong .confidence-fill { background: var(--coral); }
|
| 275 |
+
.specialist-card.correct .confidence-fill { background: var(--emerald); }
|
| 276 |
+
.specialist-opinion { font-size: 11px; font-weight: 300; line-height: 1.4; color: var(--text-secondary); }
|
| 277 |
+
.specialist-card.wrong .specialist-opinion { opacity: 0.5; }
|
| 278 |
+
.specialist-verdict { font-family: var(--font-mono); font-size: 9px; font-weight: 500; letter-spacing: 1px; text-transform: uppercase; margin-top: 6px; opacity: 0; transition: opacity var(--duration-med); }
|
| 279 |
+
.specialist-card.wrong .specialist-verdict, .specialist-card.correct .specialist-verdict { opacity: 1; }
|
| 280 |
+
.specialist-card.wrong .specialist-verdict { color: var(--coral); }
|
| 281 |
+
.specialist-card.correct .specialist-verdict { color: var(--emerald); }
|
| 282 |
+
|
| 283 |
+
/* ══════════ VITALS BAR ══════════ */
|
| 284 |
+
.vitals-bar {
|
| 285 |
+
display: grid; grid-template-columns: repeat(5, 1fr); gap: 10px;
|
| 286 |
+
}
|
| 287 |
+
.vital {
|
| 288 |
+
background: linear-gradient(135deg, var(--bg-mid), var(--bg-surface));
|
| 289 |
+
border: 1px solid var(--border-subtle); border-radius: 10px;
|
| 290 |
+
padding: 10px 14px;
|
| 291 |
+
}
|
| 292 |
+
.vital-label { font-family: var(--font-mono); font-size: 9px; font-weight: 500; letter-spacing: 1.5px; text-transform: uppercase; color: var(--text-tertiary); margin-bottom: 4px; }
|
| 293 |
+
.vital-value { font-family: var(--font-mono); font-size: 20px; font-weight: 300; transition: color var(--duration-med); }
|
| 294 |
+
.vital-value.cyan { color: var(--cyan); }
|
| 295 |
+
.vital-value.amber { color: var(--amber); }
|
| 296 |
+
.vital-value.emerald { color: var(--emerald); }
|
| 297 |
+
.vital-value.coral { color: var(--coral); }
|
| 298 |
+
.steps-dots { display: flex; gap: 5px; margin-top: 4px; }
|
| 299 |
+
.step-dot { width: 8px; height: 8px; border-radius: 50%; background: rgba(255,255,255,0.08); border: 1px solid rgba(255,255,255,0.06); transition: all var(--duration-med); }
|
| 300 |
+
.step-dot.used { background: var(--cyan); border-color: var(--cyan); box-shadow: 0 0 8px var(--cyan-dim); }
|
| 301 |
+
.step-dot.current { background: var(--amber); border-color: var(--amber); box-shadow: 0 0 8px var(--amber-dim); animation: dotPulse 1.5s ease-in-out infinite; }
|
| 302 |
+
|
| 303 |
+
/* ══════════ CONTROLS ══════════ */
|
| 304 |
+
.controls {
|
| 305 |
+
display: flex; justify-content: center; gap: 12px; padding: 4px 0;
|
| 306 |
+
}
|
| 307 |
+
.ctrl-btn {
|
| 308 |
+
font-family: var(--font-mono); font-size: 12px; font-weight: 500;
|
| 309 |
+
letter-spacing: 1px; text-transform: uppercase;
|
| 310 |
+
padding: 12px 28px; border-radius: 10px;
|
| 311 |
+
cursor: pointer; transition: all var(--duration-fast) ease;
|
| 312 |
+
border: none;
|
| 313 |
+
}
|
| 314 |
+
.ctrl-btn.primary {
|
| 315 |
+
background: linear-gradient(135deg, var(--cyan), rgba(0, 160, 220, 0.9));
|
| 316 |
+
color: #fff; box-shadow: 0 0 24px var(--cyan-dim);
|
| 317 |
+
}
|
| 318 |
+
.ctrl-btn.primary:hover { box-shadow: 0 0 40px var(--cyan-glow); transform: translateY(-1px); }
|
| 319 |
+
.ctrl-btn.primary:disabled { opacity: 0.3; cursor: not-allowed; transform: none; box-shadow: none; }
|
| 320 |
+
.ctrl-btn.secondary {
|
| 321 |
+
background: var(--bg-surface); color: var(--text-secondary);
|
| 322 |
+
border: 1px solid var(--border-subtle);
|
| 323 |
+
}
|
| 324 |
+
.ctrl-btn.secondary:hover { border-color: var(--border-active); color: var(--text-primary); }
|
| 325 |
+
.server-input {
|
| 326 |
+
font-family: var(--font-mono); font-size: 11px; padding: 10px 14px;
|
| 327 |
+
background: var(--bg-deep); color: var(--cyan); border: 1px solid var(--border-subtle);
|
| 328 |
+
border-radius: 8px; width: 200px; outline: none;
|
| 329 |
+
}
|
| 330 |
+
.server-input:focus { border-color: var(--border-active); }
|
| 331 |
+
.conn-status {
|
| 332 |
+
font-family: var(--font-mono); font-size: 10px; text-transform: uppercase;
|
| 333 |
+
letter-spacing: 1px; color: var(--text-tertiary); padding: 0 8px;
|
| 334 |
+
}
|
| 335 |
+
.conn-status.connected { color: var(--emerald); }
|
| 336 |
+
.conn-status.error { color: var(--coral); }
|
| 337 |
+
.conn-status.running { color: var(--amber); }
|
| 338 |
+
|
| 339 |
+
/* ══════════ DIAGNOSIS OVERLAY ══════════ */
|
| 340 |
+
.diagnosis-overlay {
|
| 341 |
+
position: fixed; inset: 0;
|
| 342 |
+
background: rgba(6, 10, 17, 0.85); backdrop-filter: blur(20px);
|
| 343 |
+
display: flex; align-items: center; justify-content: center;
|
| 344 |
+
z-index: 100; opacity: 0; pointer-events: none;
|
| 345 |
+
transition: opacity 0.6s var(--ease-out-expo);
|
| 346 |
+
}
|
| 347 |
+
.diagnosis-overlay.visible { opacity: 1; pointer-events: auto; }
|
| 348 |
+
.diagnosis-card {
|
| 349 |
+
background: linear-gradient(180deg, var(--bg-surface), var(--bg-deep));
|
| 350 |
+
border: 1px solid var(--border-active); border-radius: 20px;
|
| 351 |
+
padding: 40px 48px; max-width: 520px; width: 100%; text-align: center;
|
| 352 |
+
transform: scale(0.92) translateY(20px);
|
| 353 |
+
transition: transform 0.8s var(--ease-out-expo);
|
| 354 |
+
box-shadow: 0 0 60px rgba(0, 196, 255, 0.08), 0 20px 60px rgba(0, 0, 0, 0.4);
|
| 355 |
+
}
|
| 356 |
+
.diagnosis-overlay.visible .diagnosis-card { transform: scale(1) translateY(0); }
|
| 357 |
+
.diagnosis-title { font-family: var(--font-mono); font-size: 11px; font-weight: 500; letter-spacing: 3px; text-transform: uppercase; color: var(--cyan); margin-bottom: 24px; }
|
| 358 |
+
.diagnosis-result { display: flex; flex-direction: column; gap: 12px; margin-bottom: 28px; }
|
| 359 |
+
.diagnosis-row { display: flex; align-items: center; justify-content: space-between; padding: 12px 16px; background: var(--bg-deep); border-radius: 10px; border: 1px solid var(--border-subtle); }
|
| 360 |
+
.diagnosis-row-label { font-family: var(--font-mono); font-size: 10px; font-weight: 500; letter-spacing: 1px; text-transform: uppercase; color: var(--text-tertiary); }
|
| 361 |
+
.diagnosis-row-value { font-family: var(--font-mono); font-size: 13px; }
|
| 362 |
+
.diagnosis-row-value.correct { color: var(--emerald); }
|
| 363 |
+
.diagnosis-row-value.wrong { color: var(--coral); }
|
| 364 |
+
.diagnosis-reward { font-size: 48px; font-weight: 700; letter-spacing: -2px; margin-bottom: 8px; }
|
| 365 |
+
.diagnosis-reward-label { font-family: var(--font-mono); font-size: 10px; letter-spacing: 1px; text-transform: uppercase; color: var(--text-tertiary); }
|
| 366 |
+
|
| 367 |
+
::-webkit-scrollbar { width: 4px; }
|
| 368 |
+
::-webkit-scrollbar-track { background: transparent; }
|
| 369 |
+
::-webkit-scrollbar-thumb { background: rgba(100, 180, 255, 0.15); border-radius: 4px; }
|
| 370 |
+
|
| 371 |
+
@media (max-width: 1000px) {
|
| 372 |
+
.warroom { grid-template-columns: 1fr; }
|
| 373 |
+
.chart-section { grid-template-columns: 1fr; }
|
| 374 |
+
.vitals-bar { grid-template-columns: repeat(3, 1fr); }
|
| 375 |
+
}
|
| 376 |
+
</style>
|
| 377 |
+
</head>
|
| 378 |
+
<body>
|
| 379 |
+
<div class="grid-overlay"></div>
|
| 380 |
+
|
| 381 |
+
<div class="app">
|
| 382 |
+
<header class="header">
|
| 383 |
+
<div class="header-left">
|
| 384 |
+
<div class="logo-mark">
|
| 385 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round">
|
| 386 |
+
<path d="M12 3v18M3 12h18M7 7l10 10M17 7L7 17"/>
|
| 387 |
+
</svg>
|
| 388 |
+
</div>
|
| 389 |
+
<div>
|
| 390 |
+
<div class="header-title">Stack Doctor</div>
|
| 391 |
+
<div class="header-subtitle">Incident War Room</div>
|
| 392 |
+
</div>
|
| 393 |
+
</div>
|
| 394 |
+
<div class="header-right">
|
| 395 |
+
<div>
|
| 396 |
+
<div class="header-meta-label">Scenario</div>
|
| 397 |
+
<div class="header-meta-value" id="scenarioId">—</div>
|
| 398 |
+
</div>
|
| 399 |
+
<div>
|
| 400 |
+
<div class="header-meta-label">Episode</div>
|
| 401 |
+
<div class="header-meta-value" id="episodeTime">00:00</div>
|
| 402 |
+
</div>
|
| 403 |
+
<div class="status-badge" id="statusBadge">Standby</div>
|
| 404 |
+
</div>
|
| 405 |
+
</header>
|
| 406 |
+
|
| 407 |
+
<!-- ══════════ TRAINING DATA ══════════ -->
|
| 408 |
+
<div class="section-title">Training Analytics</div>
|
| 409 |
+
<div class="chart-section" style="grid-template-columns: 1fr; max-width: 900px; margin: 0 auto; width: 100%;">
|
| 410 |
+
<div class="chart-panel">
|
| 411 |
+
<div class="chart-panel-title">Qwen3.5-9B — Episode Reward</div>
|
| 412 |
+
<div class="chart-panel-subtitle">100 GRPO steps — base model already near-oracle</div>
|
| 413 |
+
<div class="chart-canvas-wrap"><canvas id="rewardChart"></canvas></div>
|
| 414 |
+
<div class="chart-stat-row">
|
| 415 |
+
<div class="chart-stat">
|
| 416 |
+
<div class="chart-stat-label">Peak</div>
|
| 417 |
+
<div class="chart-stat-value emerald">+26.00</div>
|
| 418 |
+
</div>
|
| 419 |
+
<div class="chart-stat">
|
| 420 |
+
<div class="chart-stat-label">Base Avg</div>
|
| 421 |
+
<div class="chart-stat-value cyan">+19.50</div>
|
| 422 |
+
</div>
|
| 423 |
+
<div class="chart-stat">
|
| 424 |
+
<div class="chart-stat-label">Zero-Std</div>
|
| 425 |
+
<div class="chart-stat-value coral">72%</div>
|
| 426 |
+
</div>
|
| 427 |
+
</div>
|
| 428 |
+
</div>
|
| 429 |
+
<div class="chart-panel">
|
| 430 |
+
<div class="chart-panel-title">Qwen3.5-9B — Completion Length</div>
|
| 431 |
+
<div class="chart-panel-subtitle">Thinking mode consumed token budget, hit 2048 cap</div>
|
| 432 |
+
<div class="chart-canvas-wrap"><canvas id="lengthChart"></canvas></div>
|
| 433 |
+
<div class="chart-stat-row">
|
| 434 |
+
<div class="chart-stat">
|
| 435 |
+
<div class="chart-stat-label">Collapse</div>
|
| 436 |
+
<div class="chart-stat-value coral">Step 36</div>
|
| 437 |
+
</div>
|
| 438 |
+
<div class="chart-stat">
|
| 439 |
+
<div class="chart-stat-label">Clipping</div>
|
| 440 |
+
<div class="chart-stat-value amber">Step 69</div>
|
| 441 |
+
</div>
|
| 442 |
+
</div>
|
| 443 |
+
</div>
|
| 444 |
+
<div class="chart-panel" style="border-color: rgba(0, 196, 255, 0.15);">
|
| 445 |
+
<div class="chart-panel-title">Qwen2.5-1.5B — Episode Reward</div>
|
| 446 |
+
<div class="chart-panel-subtitle">16 GRPO steps — weak model, real gradient signal</div>
|
| 447 |
+
<div class="chart-canvas-wrap"><canvas id="reward1bChart"></canvas></div>
|
| 448 |
+
<div class="chart-stat-row">
|
| 449 |
+
<div class="chart-stat">
|
| 450 |
+
<div class="chart-stat-label">Best Step</div>
|
| 451 |
+
<div class="chart-stat-value cyan">-1.75</div>
|
| 452 |
+
</div>
|
| 453 |
+
<div class="chart-stat">
|
| 454 |
+
<div class="chart-stat-label">Avg</div>
|
| 455 |
+
<div class="chart-stat-value amber">-4.90</div>
|
| 456 |
+
</div>
|
| 457 |
+
<div class="chart-stat">
|
| 458 |
+
<div class="chart-stat-label">Zero-Std</div>
|
| 459 |
+
<div class="chart-stat-value emerald">0%</div>
|
| 460 |
+
</div>
|
| 461 |
+
</div>
|
| 462 |
+
</div>
|
| 463 |
+
</div>
|
| 464 |
+
|
| 465 |
+
<!-- ══════════ DEMO CONTROLS ══════════ -->
|
| 466 |
+
<div class="section-title">Live Environment</div>
|
| 467 |
+
<div class="controls">
|
| 468 |
+
<input type="text" id="serverUrl" class="server-input" placeholder="Server URL (empty = same origin)">
|
| 469 |
+
<script>
|
| 470 |
+
/* Auto-detect: use localhost:8000 for local dev, empty for HF Spaces */
|
| 471 |
+
if (location.hostname === 'localhost' || location.hostname === '127.0.0.1') {
|
| 472 |
+
document.getElementById('serverUrl').value = 'http://localhost:8000';
|
| 473 |
+
}
|
| 474 |
+
</script>
|
| 475 |
+
<button class="ctrl-btn primary" id="demoBtn" onclick="runComparison()">▶ Run Comparison (Base → GRPO Trained)</button>
|
| 476 |
+
<button class="ctrl-btn secondary" id="resetBtn" onclick="resetState()">↺ Reset</button>
|
| 477 |
+
<span id="modelStatus" class="conn-status" style="margin-left:8px;">Model: checking...</span>
|
| 478 |
+
<span id="connStatus" class="conn-status">Disconnected</span>
|
| 479 |
+
</div>
|
| 480 |
+
|
| 481 |
+
<!-- ══════════ WAR ROOM ══════════ -->
|
| 482 |
+
<div class="warroom">
|
| 483 |
+
<div class="panel">
|
| 484 |
+
<div class="panel-header">
|
| 485 |
+
<div class="panel-header-dot"></div>
|
| 486 |
+
<div class="panel-header-title">Inference Stack</div>
|
| 487 |
+
</div>
|
| 488 |
+
<div class="arch-body" id="archBody">
|
| 489 |
+
<div class="arch-layer" id="layer-model"><div class="layer-name">Model</div><div class="layer-detail" id="detail-model">—</div></div>
|
| 490 |
+
<div class="arch-connector"><div class="data-dot"></div></div>
|
| 491 |
+
<div class="arch-layer" id="layer-kernel"><div class="layer-name">Kernel</div><div class="layer-detail">Attention / GEMM</div></div>
|
| 492 |
+
<div class="arch-connector"><div class="data-dot" style="animation-delay:-0.5s"></div></div>
|
| 493 |
+
<div class="arch-layer" id="layer-backend"><div class="layer-name">Backend</div><div class="layer-detail" id="detail-backend">—</div></div>
|
| 494 |
+
<div class="arch-connector"><div class="data-dot" style="animation-delay:-1s"></div></div>
|
| 495 |
+
<div class="arch-layer" id="layer-runtime"><div class="layer-name">Runtime</div><div class="layer-detail">CUDA / ROCm</div></div>
|
| 496 |
+
<div class="arch-connector"><div class="data-dot" style="animation-delay:-1.5s"></div></div>
|
| 497 |
+
<div class="arch-layer" id="layer-memory"><div class="layer-name">Memory</div><div class="layer-detail">HBM / KV Cache</div></div>
|
| 498 |
+
<div class="arch-connector"><div class="data-dot" style="animation-delay:-2s"></div></div>
|
| 499 |
+
<div class="arch-layer" id="layer-driver"><div class="layer-name">Driver</div><div class="layer-detail" id="detail-driver">—</div></div>
|
| 500 |
+
</div>
|
| 501 |
+
</div>
|
| 502 |
+
|
| 503 |
+
<div class="panel">
|
| 504 |
+
<div class="panel-header">
|
| 505 |
+
<div class="panel-header-dot"></div>
|
| 506 |
+
<div class="panel-header-title">Investigation Log</div>
|
| 507 |
+
</div>
|
| 508 |
+
<div class="log-body" id="logBody">
|
| 509 |
+
<div class="idle-prompt" id="idlePrompt">
|
| 510 |
+
<div class="idle-text">Awaiting incident assignment.<br>Click <strong>Run Demo</strong> above to start.</div>
|
| 511 |
+
</div>
|
| 512 |
+
</div>
|
| 513 |
+
</div>
|
| 514 |
+
|
| 515 |
+
<div class="panel">
|
| 516 |
+
<div class="panel-header">
|
| 517 |
+
<div class="panel-header-dot"></div>
|
| 518 |
+
<div class="panel-header-title">Specialist Agents</div>
|
| 519 |
+
</div>
|
| 520 |
+
<div class="specialists-body" id="specialistsBody"></div>
|
| 521 |
+
</div>
|
| 522 |
+
</div>
|
| 523 |
+
|
| 524 |
+
<!-- ══════════ VITALS ══════════ -->
|
| 525 |
+
<div class="vitals-bar">
|
| 526 |
+
<div class="vital">
|
| 527 |
+
<div class="vital-label">Steps</div>
|
| 528 |
+
<div class="steps-dots" id="stepsDots">
|
| 529 |
+
<div class="step-dot"></div><div class="step-dot"></div><div class="step-dot"></div>
|
| 530 |
+
<div class="step-dot"></div><div class="step-dot"></div><div class="step-dot"></div>
|
| 531 |
+
</div>
|
| 532 |
+
</div>
|
| 533 |
+
<div class="vital"><div class="vital-label">Reward</div><div class="vital-value" id="rewardValue">0.00</div></div>
|
| 534 |
+
<div class="vital"><div class="vital-label">Fix Status</div><div class="vital-value" id="fixStatus" style="font-size:13px">Not Applied</div></div>
|
| 535 |
+
<div class="vital"><div class="vital-label">Root Cause</div><div class="vital-value" id="rootCauseValue" style="font-size:13px">—</div></div>
|
| 536 |
+
<div class="vital"><div class="vital-label">Diagnosis</div><div class="vital-value" id="diagnosisValue" style="font-size:13px">Pending</div></div>
|
| 537 |
+
</div>
|
| 538 |
+
</div>
|
| 539 |
+
|
| 540 |
+
<div class="diagnosis-overlay" id="diagnosisOverlay">
|
| 541 |
+
<div class="diagnosis-card">
|
| 542 |
+
<div class="diagnosis-title">Diagnosis Submitted</div>
|
| 543 |
+
<div class="diagnosis-result" id="diagnosisResult"></div>
|
| 544 |
+
<div class="diagnosis-reward" id="diagnosisReward">+0.00</div>
|
| 545 |
+
<div class="diagnosis-reward-label">Episode Reward</div>
|
| 546 |
+
</div>
|
| 547 |
+
</div>
|
| 548 |
+
|
| 549 |
+
<script>
|
| 550 |
+
/* ═══════════════════════════════════════════════
|
| 551 |
+
TRAINING DATA — Qwen3.5-9B, 100 GRPO steps
|
| 552 |
+
═══════════════════════════════════════════════ */
|
| 553 |
+
var TRAIN_DATA = {
|
| 554 |
+
steps: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100],
|
| 555 |
+
reward: [9.65,19.75,7.25,24.25,23.00,22.75,2.62,22.75,23.00,24.50,16.00,23.00,24.50,7.25,24.62,26.00,26.00,26.00,22.50,14.12,-5.45,7.12,-2.60,-8.50,18.50,0.88,26.25,7.88,9.62,7.88,6.88,24.25,-6.25,-5.50,-1.88,-1.75,-1.75,-5.12,-2.62,-1.75,-1.75,-1.75,-2.62,-1.75,-1.75,-1.75,-1.75,-1.75,-1.75,-1.75,-5.12,-1.75,-1.75,-1.75,-1.75,-1.75,-1.75,-1.75,-5.12,-5.12,-1.75,-1.75,-1.75,-1.75,-8.50,-5.12,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-8.50,-5.00,-8.50,-8.50,-8.50,-1.75,-5.12,-1.75,-1.75,-1.75,-5.12,-1.75,-1.75,-0.12,-1.75,-2.62,-1.75,-1.75],
|
| 556 |
+
completion_length: [68,98,91,96,85,92,96,74,99,71,86,116,112,124,126,175,105,120,152,112,148,190,193,182,164,135,140,218,130,152,164,182,134,93,24,15,18,87,16,16,15,15,15,15,13,15,15,16,15,15,125,62,180,876,376,280,883,484,734,470,432,488,354,177,471,607,248,210,1234,3,2048,2048,2048,2048,2048,1024,1026,1078,2048,1025,14,1030,2048,1025,2048,2048,2048,1364,1032,15,15,15,14,15,15,1032,2048,1065,2048,1058],
|
| 557 |
+
};
|
| 558 |
+
|
| 559 |
+
/* Qwen2.5-1.5B — 16 steps before crash (fixed in next run) */
|
| 560 |
+
var TRAIN_DATA_1B = {
|
| 561 |
+
steps: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],
|
| 562 |
+
reward: [-5.625,-3.375,-8.375,-7.75,-2.75,-2.625,-3.0,-5.125,-8.875,-3.0,-1.75,-3.875,-9.5,-5.5,-3.125,-6.125],
|
| 563 |
+
completion_length: [65.5,57.5,53,36.5,81,98,89,41.5,46,62.5,75,72.5,48,52.5,44,60],
|
| 564 |
+
};
|
| 565 |
+
|
| 566 |
+
/* ═══════════════════════════════════════════════
|
| 567 |
+
CHARTS — lightweight canvas rendering
|
| 568 |
+
═══════════════════════════════════════════════ */
|
| 569 |
+
function drawChart(canvasId, data, opts) {
|
| 570 |
+
var canvas = document.getElementById(canvasId);
|
| 571 |
+
var dpr = window.devicePixelRatio || 1;
|
| 572 |
+
var rect = canvas.parentElement.getBoundingClientRect();
|
| 573 |
+
canvas.width = rect.width * dpr;
|
| 574 |
+
canvas.height = rect.height * dpr;
|
| 575 |
+
canvas.style.width = rect.width + 'px';
|
| 576 |
+
canvas.style.height = rect.height + 'px';
|
| 577 |
+
var ctx = canvas.getContext('2d');
|
| 578 |
+
ctx.scale(dpr, dpr);
|
| 579 |
+
var W = rect.width, H = rect.height;
|
| 580 |
+
var pad = { top: 28, right: 24, bottom: 40, left: 64 };
|
| 581 |
+
var plotW = W - pad.left - pad.right;
|
| 582 |
+
var plotH = H - pad.top - pad.bottom;
|
| 583 |
+
|
| 584 |
+
var minY = opts.minY !== undefined ? opts.minY : Math.min.apply(null, data);
|
| 585 |
+
var maxY = opts.maxY !== undefined ? opts.maxY : Math.max.apply(null, data);
|
| 586 |
+
var rangeY = maxY - minY || 1;
|
| 587 |
+
|
| 588 |
+
function xPos(i) { return pad.left + (i / (data.length - 1)) * plotW; }
|
| 589 |
+
function yPos(v) { return pad.top + plotH - ((v - minY) / rangeY) * plotH; }
|
| 590 |
+
|
| 591 |
+
// Grid lines
|
| 592 |
+
ctx.strokeStyle = 'rgba(100,180,255,0.06)';
|
| 593 |
+
ctx.lineWidth = 1;
|
| 594 |
+
var gridCount = 5;
|
| 595 |
+
for (var g = 0; g <= gridCount; g++) {
|
| 596 |
+
var gy = pad.top + (g / gridCount) * plotH;
|
| 597 |
+
ctx.beginPath(); ctx.moveTo(pad.left, gy); ctx.lineTo(W - pad.right, gy); ctx.stroke();
|
| 598 |
+
var label = (maxY - (g / gridCount) * rangeY).toFixed(0);
|
| 599 |
+
ctx.fillStyle = 'rgba(216,224,236,0.25)';
|
| 600 |
+
ctx.font = '10px IBM Plex Mono';
|
| 601 |
+
ctx.textAlign = 'right';
|
| 602 |
+
ctx.fillText(label, pad.left - 12, gy + 4);
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
// X axis labels
|
| 606 |
+
var stepsArr = opts.stepsArray || TRAIN_DATA.steps;
|
| 607 |
+
var xInterval = data.length > 30 ? 20 : data.length > 10 ? 5 : 2;
|
| 608 |
+
ctx.fillStyle = 'rgba(216,224,236,0.25)';
|
| 609 |
+
ctx.textAlign = 'center';
|
| 610 |
+
for (var x = 0; x < data.length; x += xInterval) {
|
| 611 |
+
ctx.fillText(stepsArr[x], xPos(x), H - 8);
|
| 612 |
+
}
|
| 613 |
+
ctx.fillText(stepsArr[data.length - 1], xPos(data.length - 1), H - 8);
|
| 614 |
+
|
| 615 |
+
// Zero line for reward chart
|
| 616 |
+
if (opts.zeroLine) {
|
| 617 |
+
var zy = yPos(0);
|
| 618 |
+
if (zy >= pad.top && zy <= pad.top + plotH) {
|
| 619 |
+
ctx.strokeStyle = 'rgba(255,255,255,0.12)';
|
| 620 |
+
ctx.setLineDash([4, 4]);
|
| 621 |
+
ctx.beginPath(); ctx.moveTo(pad.left, zy); ctx.lineTo(W - pad.right, zy); ctx.stroke();
|
| 622 |
+
ctx.setLineDash([]);
|
| 623 |
+
}
|
| 624 |
+
}
|
| 625 |
+
|
| 626 |
+
// Reference lines (baselines) — drawn before data so they appear underneath
|
| 627 |
+
if (opts.refLines) {
|
| 628 |
+
opts.refLines.forEach(function(ref) {
|
| 629 |
+
var ry = yPos(ref.value);
|
| 630 |
+
if (ry >= pad.top - 5 && ry <= pad.top + plotH + 5) {
|
| 631 |
+
ctx.strokeStyle = ref.color || 'rgba(255,255,255,0.2)';
|
| 632 |
+
ctx.lineWidth = 1.5;
|
| 633 |
+
ctx.setLineDash(ref.dash || [6, 4]);
|
| 634 |
+
ctx.beginPath(); ctx.moveTo(pad.left, ry); ctx.lineTo(W - pad.right, ry); ctx.stroke();
|
| 635 |
+
ctx.setLineDash([]);
|
| 636 |
+
// Draw label with opaque background so it's never covered
|
| 637 |
+
var labelY = ref.labelBelow ? ry + 24 : ry - 16;
|
| 638 |
+
ctx.font = '600 9px IBM Plex Mono';
|
| 639 |
+
var textWidth = ctx.measureText(ref.label).width;
|
| 640 |
+
// Background pill
|
| 641 |
+
ctx.fillStyle = 'rgba(6, 10, 17, 0.85)';
|
| 642 |
+
ctx.beginPath();
|
| 643 |
+
ctx.roundRect(W - pad.right - textWidth - 16, labelY - 9, textWidth + 12, 14, 3);
|
| 644 |
+
ctx.fill();
|
| 645 |
+
// Label text
|
| 646 |
+
ctx.fillStyle = ref.color || 'rgba(255,255,255,0.3)';
|
| 647 |
+
ctx.textAlign = 'right';
|
| 648 |
+
ctx.fillText(ref.label, W - pad.right - 8, labelY);
|
| 649 |
+
}
|
| 650 |
+
});
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
// Regions
|
| 654 |
+
if (opts.regions) {
|
| 655 |
+
opts.regions.forEach(function(r) {
|
| 656 |
+
var x1 = xPos(r.from);
|
| 657 |
+
var x2 = xPos(r.to);
|
| 658 |
+
ctx.fillStyle = r.color;
|
| 659 |
+
ctx.fillRect(x1, pad.top, x2 - x1, plotH);
|
| 660 |
+
ctx.fillStyle = r.labelColor || 'rgba(216,224,236,0.35)';
|
| 661 |
+
ctx.font = '600 8px IBM Plex Mono';
|
| 662 |
+
ctx.textAlign = 'center';
|
| 663 |
+
ctx.fillText(r.label, (x1 + x2) / 2, pad.top - 6);
|
| 664 |
+
});
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
// Line gradient
|
| 668 |
+
var grad = ctx.createLinearGradient(pad.left, 0, W - pad.right, 0);
|
| 669 |
+
if (opts.gradientStops) {
|
| 670 |
+
opts.gradientStops.forEach(function(s) { grad.addColorStop(s[0], s[1]); });
|
| 671 |
+
} else {
|
| 672 |
+
grad.addColorStop(0, opts.color || '#00c4ff');
|
| 673 |
+
grad.addColorStop(1, opts.color || '#00c4ff');
|
| 674 |
+
}
|
| 675 |
+
|
| 676 |
+
// Area fill
|
| 677 |
+
ctx.beginPath();
|
| 678 |
+
ctx.moveTo(xPos(0), yPos(data[0]));
|
| 679 |
+
for (var i = 1; i < data.length; i++) ctx.lineTo(xPos(i), yPos(data[i]));
|
| 680 |
+
ctx.lineTo(xPos(data.length - 1), yPos(minY));
|
| 681 |
+
ctx.lineTo(xPos(0), yPos(minY));
|
| 682 |
+
ctx.closePath();
|
| 683 |
+
var areaGrad = ctx.createLinearGradient(0, pad.top, 0, pad.top + plotH);
|
| 684 |
+
areaGrad.addColorStop(0, (opts.areaColor || 'rgba(0,196,255,0.12)'));
|
| 685 |
+
areaGrad.addColorStop(1, 'rgba(0,196,255,0)');
|
| 686 |
+
ctx.fillStyle = areaGrad;
|
| 687 |
+
ctx.fill();
|
| 688 |
+
|
| 689 |
+
// Line
|
| 690 |
+
ctx.strokeStyle = grad;
|
| 691 |
+
ctx.lineWidth = 2;
|
| 692 |
+
ctx.lineJoin = 'round';
|
| 693 |
+
ctx.beginPath();
|
| 694 |
+
ctx.moveTo(xPos(0), yPos(data[0]));
|
| 695 |
+
for (var j = 1; j < data.length; j++) ctx.lineTo(xPos(j), yPos(data[j]));
|
| 696 |
+
ctx.stroke();
|
| 697 |
+
|
| 698 |
+
// Dot at end
|
| 699 |
+
var lastX = xPos(data.length - 1), lastY = yPos(data[data.length - 1]);
|
| 700 |
+
ctx.fillStyle = opts.color || '#00c4ff';
|
| 701 |
+
ctx.beginPath(); ctx.arc(lastX, lastY, 3, 0, Math.PI * 2); ctx.fill();
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
function renderCharts() {
|
| 705 |
+
drawChart('rewardChart', TRAIN_DATA.reward, {
|
| 706 |
+
minY: -12, maxY: 30, zeroLine: true,
|
| 707 |
+
gradientStops: [[0, '#00e676'], [0.18, '#00e676'], [0.22, '#f0a030'], [0.35, '#ff3d5a'], [1, '#ff3d5a']],
|
| 708 |
+
areaColor: 'rgba(0,196,255,0.08)',
|
| 709 |
+
regions: [
|
| 710 |
+
{ from: 0, to: 19, label: 'BASE MODEL', color: 'rgba(0,230,118,0.04)', labelColor: 'rgba(0,230,118,0.5)' },
|
| 711 |
+
{ from: 20, to: 35, label: 'DEGRADATION', color: 'rgba(240,160,48,0.04)', labelColor: 'rgba(240,160,48,0.5)' },
|
| 712 |
+
{ from: 36, to: 69, label: 'COLLAPSE', color: 'rgba(255,61,90,0.04)', labelColor: 'rgba(255,61,90,0.5)' },
|
| 713 |
+
{ from: 70, to: 99, label: 'CLIPPING', color: 'rgba(255,61,90,0.03)', labelColor: 'rgba(255,61,90,0.4)' },
|
| 714 |
+
],
|
| 715 |
+
refLines: [
|
| 716 |
+
{ value: 19.5, label: '9B BASELINE +19.5', color: 'rgba(0,230,118,0.5)', dash: [8, 4] },
|
| 717 |
+
],
|
| 718 |
+
});
|
| 719 |
+
|
| 720 |
+
drawChart('lengthChart', TRAIN_DATA.completion_length, {
|
| 721 |
+
minY: 0, maxY: 2200,
|
| 722 |
+
color: '#f0a030',
|
| 723 |
+
areaColor: 'rgba(240,160,48,0.08)',
|
| 724 |
+
regions: [
|
| 725 |
+
{ from: 0, to: 35, label: 'NORMAL', color: 'rgba(0,230,118,0.03)', labelColor: 'rgba(0,230,118,0.4)' },
|
| 726 |
+
{ from: 36, to: 68, label: 'SHORT OUTPUT', color: 'rgba(240,160,48,0.04)', labelColor: 'rgba(240,160,48,0.5)' },
|
| 727 |
+
{ from: 69, to: 99, label: 'HIT CAP (2048)', color: 'rgba(255,61,90,0.04)', labelColor: 'rgba(255,61,90,0.5)' },
|
| 728 |
+
],
|
| 729 |
+
});
|
| 730 |
+
|
| 731 |
+
drawChart('reward1bChart', TRAIN_DATA_1B.reward, {
|
| 732 |
+
minY: -12, maxY: 22, zeroLine: true,
|
| 733 |
+
stepsArray: TRAIN_DATA_1B.steps,
|
| 734 |
+
color: '#00c4ff',
|
| 735 |
+
areaColor: 'rgba(0,196,255,0.1)',
|
| 736 |
+
gradientStops: [[0, '#ff3d5a'], [0.4, '#f0a030'], [0.7, '#00c4ff'], [1, '#00c4ff']],
|
| 737 |
+
refLines: [
|
| 738 |
+
{ value: 19.5, label: '9B BASELINE +19.5', color: 'rgba(0,230,118,0.4)', dash: [8, 4] },
|
| 739 |
+
{ value: -4.9, label: '1.5B BASELINE -4.9', color: 'rgba(240,160,48,0.5)', dash: [4, 4], labelBelow: true },
|
| 740 |
+
],
|
| 741 |
+
});
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
window.addEventListener('resize', renderCharts);
|
| 745 |
+
setTimeout(renderCharts, 100);
|
| 746 |
+
|
| 747 |
+
/* ═══════════════════════════════════════════════
|
| 748 |
+
WAR ROOM ENGINE
|
| 749 |
+
═══════════════════════════════════════════════ */
|
| 750 |
+
var ACTION_ICONS = { inspect: '\u2315', ask_specialist: '\u25C9', apply_fix: '\u26A1', submit: '\u2713', reward_breakdown: '\u2261' };
|
| 751 |
+
var state = { step: 0, reward: 0, fixApplied: false, done: false, startTime: null, timerInterval: null };
|
| 752 |
+
|
| 753 |
+
function startTimer() {
|
| 754 |
+
state.startTime = Date.now();
|
| 755 |
+
state.timerInterval = setInterval(function() {
|
| 756 |
+
var e = Math.floor((Date.now() - state.startTime) / 1000);
|
| 757 |
+
document.getElementById('episodeTime').textContent = String(Math.floor(e/60)).padStart(2,'0') + ':' + String(e%60).padStart(2,'0');
|
| 758 |
+
}, 1000);
|
| 759 |
+
}
|
| 760 |
+
function stopTimer() { if (state.timerInterval) clearInterval(state.timerInterval); }
|
| 761 |
+
|
| 762 |
+
function updateSteps(step) {
|
| 763 |
+
document.querySelectorAll('.step-dot').forEach(function(d, i) {
|
| 764 |
+
d.className = 'step-dot';
|
| 765 |
+
if (i < step) d.classList.add('used');
|
| 766 |
+
else if (i === step && !state.done) d.classList.add('current');
|
| 767 |
+
});
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
function updateReward(delta) {
|
| 771 |
+
state.reward += delta;
|
| 772 |
+
var el = document.getElementById('rewardValue');
|
| 773 |
+
el.textContent = (state.reward >= 0 ? '+' : '') + state.reward.toFixed(2);
|
| 774 |
+
el.className = 'vital-value ' + (state.reward >= 0 ? 'emerald' : 'coral');
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
function setLayerState(id, cls) {
|
| 778 |
+
var el = document.getElementById('layer-' + id);
|
| 779 |
+
if (el) el.className = 'arch-layer' + (cls ? ' ' + cls : '');
|
| 780 |
+
}
|
| 781 |
+
function clearAllLayers() { document.querySelectorAll('.arch-layer').forEach(function(e) { e.className = 'arch-layer'; }); }
|
| 782 |
+
function setStatus(text, cls) { var el = document.getElementById('statusBadge'); el.textContent = text; el.className = 'status-badge' + (cls ? ' ' + cls : ''); }
|
| 783 |
+
|
| 784 |
+
function addLogEntry(opts) {
|
| 785 |
+
var idle = document.getElementById('idlePrompt'); if (idle) idle.remove();
|
| 786 |
+
var body = document.getElementById('logBody');
|
| 787 |
+
var entry = document.createElement('div'); entry.className = 'log-entry';
|
| 788 |
+
var iconCls = opts.type === 'ask_specialist' ? 'specialist' : opts.type;
|
| 789 |
+
var typeCls = opts.type === 'inspect' ? 'cyan' : opts.type === 'ask_specialist' ? 'purple' : opts.type === 'apply_fix' ? 'amber' : 'emerald';
|
| 790 |
+
var iconEl = document.createElement('div'); iconEl.className = 'log-entry-icon ' + iconCls; iconEl.textContent = ACTION_ICONS[opts.type] || '\u2022';
|
| 791 |
+
var contentEl = document.createElement('div'); contentEl.className = 'log-entry-content';
|
| 792 |
+
var headerEl = document.createElement('div'); headerEl.className = 'log-entry-header';
|
| 793 |
+
var typeEl = document.createElement('div'); typeEl.className = 'log-entry-type ' + typeCls; typeEl.textContent = opts.label;
|
| 794 |
+
var stepEl = document.createElement('div'); stepEl.className = 'log-entry-step'; stepEl.textContent = 'Step ' + state.step + '/6';
|
| 795 |
+
headerEl.appendChild(typeEl); headerEl.appendChild(stepEl);
|
| 796 |
+
var textEl = document.createElement('div'); textEl.className = 'log-entry-text'; textEl.textContent = opts.text;
|
| 797 |
+
contentEl.appendChild(headerEl); contentEl.appendChild(textEl);
|
| 798 |
+
if (opts.reward !== undefined) {
|
| 799 |
+
var rEl = document.createElement('div'); rEl.className = 'log-entry-reward ' + (opts.reward >= 0 ? 'positive' : 'negative');
|
| 800 |
+
rEl.textContent = (opts.reward >= 0 ? '+' : '') + opts.reward.toFixed(2); contentEl.appendChild(rEl);
|
| 801 |
+
}
|
| 802 |
+
entry.appendChild(iconEl); entry.appendChild(contentEl);
|
| 803 |
+
body.appendChild(entry); body.scrollTop = body.scrollHeight;
|
| 804 |
+
}
|
| 805 |
+
|
| 806 |
+
function addIncidentCard(sc) {
|
| 807 |
+
var idle = document.getElementById('idlePrompt'); if (idle) idle.remove();
|
| 808 |
+
var body = document.getElementById('logBody');
|
| 809 |
+
var card = document.createElement('div'); card.className = 'incident-card';
|
| 810 |
+
var label = document.createElement('div'); label.className = 'incident-label'; label.textContent = 'Incident Ticket';
|
| 811 |
+
var text = document.createElement('div'); text.className = 'incident-text'; text.textContent = sc.incident_ticket;
|
| 812 |
+
var meta = document.createElement('div'); meta.className = 'incident-meta';
|
| 813 |
+
[['Hardware', sc.hardware], ['Model', sc.model_name], ['Backend', sc.backend]].forEach(function(f) {
|
| 814 |
+
var item = document.createElement('div'); item.className = 'incident-meta-item';
|
| 815 |
+
var ml = document.createElement('div'); ml.className = 'meta-label'; ml.textContent = f[0];
|
| 816 |
+
var mv = document.createElement('div'); mv.className = 'meta-value'; mv.textContent = f[1];
|
| 817 |
+
item.appendChild(ml); item.appendChild(mv); meta.appendChild(item);
|
| 818 |
+
});
|
| 819 |
+
card.appendChild(label); card.appendChild(text); card.appendChild(meta); body.appendChild(card);
|
| 820 |
+
}
|
| 821 |
+
|
| 822 |
+
function populateSpecialists(ops) {
|
| 823 |
+
var body = document.getElementById('specialistsBody'); body.textContent = '';
|
| 824 |
+
var i = 0;
|
| 825 |
+
Object.keys(ops).forEach(function(name) {
|
| 826 |
+
var d = ops[name];
|
| 827 |
+
var card = document.createElement('div'); card.className = 'specialist-card'; card.id = 'specialist-' + name;
|
| 828 |
+
card.style.animationDelay = (i * 0.1) + 's';
|
| 829 |
+
var top = document.createElement('div'); top.className = 'specialist-top';
|
| 830 |
+
var nameEl = document.createElement('div'); nameEl.className = 'specialist-name'; nameEl.textContent = name;
|
| 831 |
+
var barW = document.createElement('div'); barW.className = 'confidence-bar';
|
| 832 |
+
var barF = document.createElement('div'); barF.className = 'confidence-fill'; barF.style.width = (d.confidence * 100) + '%';
|
| 833 |
+
barW.appendChild(barF); top.appendChild(nameEl); top.appendChild(barW);
|
| 834 |
+
var opEl = document.createElement('div'); opEl.className = 'specialist-opinion'; opEl.textContent = d.opinion;
|
| 835 |
+
var vEl = document.createElement('div'); vEl.className = 'specialist-verdict';
|
| 836 |
+
card.appendChild(top); card.appendChild(opEl); card.appendChild(vEl); body.appendChild(card); i++;
|
| 837 |
+
});
|
| 838 |
+
}
|
| 839 |
+
|
| 840 |
+
function highlightSpecialist(name) {
|
| 841 |
+
document.querySelectorAll('.specialist-card').forEach(function(c) { c.classList.remove('highlighted'); });
|
| 842 |
+
var c = document.getElementById('specialist-' + name); if (c) c.classList.add('highlighted');
|
| 843 |
+
}
|
| 844 |
+
function markSpecialist(name, correct) {
|
| 845 |
+
var c = document.getElementById('specialist-' + name); if (!c) return;
|
| 846 |
+
c.classList.remove('highlighted'); c.classList.add(correct ? 'correct' : 'wrong');
|
| 847 |
+
var v = c.querySelector('.specialist-verdict'); if (v) v.textContent = correct ? '\u2713 Helpful' : '\u2014 Not Relevant';
|
| 848 |
+
}
|
| 849 |
+
|
| 850 |
+
function showDiagnosis(d) {
|
| 851 |
+
var overlay = document.getElementById('diagnosisOverlay');
|
| 852 |
+
var result = document.getElementById('diagnosisResult'); result.textContent = '';
|
| 853 |
+
[['Root Cause', d.rootCause, d.rcCorrect, d.correctRc], ['Fix', d.fix, d.fixCorrect, d.correctFix]].forEach(function(r) {
|
| 854 |
+
var row = document.createElement('div'); row.className = 'diagnosis-row';
|
| 855 |
+
var lbl = document.createElement('div'); lbl.className = 'diagnosis-row-label'; lbl.textContent = r[0];
|
| 856 |
+
var val = document.createElement('div'); val.className = 'diagnosis-row-value ' + (r[2] ? 'correct' : 'wrong');
|
| 857 |
+
val.textContent = r[1] + (r[2] ? ' \u2713' : ' \u2717 \u2192 ' + r[3]);
|
| 858 |
+
row.appendChild(lbl); row.appendChild(val); result.appendChild(row);
|
| 859 |
+
});
|
| 860 |
+
// Steps used + time
|
| 861 |
+
var stepsRow = document.createElement('div'); stepsRow.className = 'diagnosis-row';
|
| 862 |
+
var stepsLbl = document.createElement('div'); stepsLbl.className = 'diagnosis-row-label'; stepsLbl.textContent = 'Steps Used';
|
| 863 |
+
var stepsVal = document.createElement('div'); stepsVal.className = 'diagnosis-row-value'; stepsVal.textContent = state.step + ' / 6';
|
| 864 |
+
stepsRow.appendChild(stepsLbl); stepsRow.appendChild(stepsVal); result.appendChild(stepsRow);
|
| 865 |
+
|
| 866 |
+
var timeRow = document.createElement('div'); timeRow.className = 'diagnosis-row';
|
| 867 |
+
var timeLbl = document.createElement('div'); timeLbl.className = 'diagnosis-row-label'; timeLbl.textContent = 'Time';
|
| 868 |
+
var elapsed = state.startTime ? Math.round((Date.now() - state.startTime) / 1000) : 0;
|
| 869 |
+
var timeVal = document.createElement('div'); timeVal.className = 'diagnosis-row-value'; timeVal.textContent = elapsed + 's';
|
| 870 |
+
timeRow.appendChild(timeLbl); timeRow.appendChild(timeVal); result.appendChild(timeRow);
|
| 871 |
+
|
| 872 |
+
var rEl = document.getElementById('diagnosisReward');
|
| 873 |
+
rEl.textContent = (d.totalReward >= 0 ? '+' : '') + d.totalReward.toFixed(2);
|
| 874 |
+
rEl.style.color = d.totalReward >= 0 ? 'var(--emerald)' : 'var(--coral)';
|
| 875 |
+
overlay.classList.add('visible');
|
| 876 |
+
}
|
| 877 |
+
|
| 878 |
+
function resetState() {
|
| 879 |
+
state = { step: 0, reward: 0, fixApplied: false, done: false, startTime: null, timerInterval: null };
|
| 880 |
+
stopTimer();
|
| 881 |
+
document.getElementById('logBody').textContent = '';
|
| 882 |
+
var logBody = document.getElementById('logBody');
|
| 883 |
+
var idle = document.createElement('div'); idle.className = 'idle-prompt'; idle.id = 'idlePrompt';
|
| 884 |
+
var txt = document.createElement('div'); txt.className = 'idle-text'; txt.textContent = 'Awaiting incident assignment.\nConnect to a live Stack Doctor environment above.';
|
| 885 |
+
idle.appendChild(txt); logBody.appendChild(idle);
|
| 886 |
+
document.getElementById('specialistsBody').textContent = '';
|
| 887 |
+
document.getElementById('rewardValue').textContent = '0.00'; document.getElementById('rewardValue').className = 'vital-value';
|
| 888 |
+
document.getElementById('fixStatus').textContent = 'Not Applied'; document.getElementById('fixStatus').className = 'vital-value';
|
| 889 |
+
document.getElementById('rootCauseValue').textContent = '\u2014'; document.getElementById('rootCauseValue').className = 'vital-value';
|
| 890 |
+
document.getElementById('diagnosisValue').textContent = 'Pending'; document.getElementById('diagnosisValue').className = 'vital-value';
|
| 891 |
+
document.getElementById('scenarioId').textContent = '\u2014';
|
| 892 |
+
document.getElementById('episodeTime').textContent = '00:00';
|
| 893 |
+
document.getElementById('diagnosisOverlay').classList.remove('visible');
|
| 894 |
+
setStatus('Standby', ''); clearAllLayers(); updateSteps(0);
|
| 895 |
+
setConnStatus('Disconnected', '');
|
| 896 |
+
}
|
| 897 |
+
|
| 898 |
+
/* ═══════════════════════════════════════════════
|
| 899 |
+
DEMO — arch_guard_01
|
| 900 |
+
═══════════════════════════════════════════════ */
|
| 901 |
+
var DEMO = {
|
| 902 |
+
id: 'arch_guard_01', root_cause: 'arch_guard', correct_fix: 'relax_arch_check',
|
| 903 |
+
incident_ticket: "FlashInfer attention kernel fails to launch on newly provisioned DGX Spark nodes. Error: 'Unsupported GPU architecture sm_121'. Identical model config works on H100 nodes.",
|
| 904 |
+
hardware: 'NVIDIA SM121 (DGX Spark)', model_name: 'DeepSeek-V3-671B', backend: 'FlashInfer 0.4',
|
| 905 |
+
specialist_opinions: {
|
| 906 |
+
runtime: { opinion: "CUDA runtime loaded successfully. No runtime issues detected.", confidence: 0.85 },
|
| 907 |
+
dispatch: { opinion: "Architecture check is blocking kernel dispatch. SM121 is not in the supported set despite being SM90-compatible.", confidence: 0.92 },
|
| 908 |
+
kernel: { opinion: "HMMA m16n8k16 instructions available on SM121. Capability check issue.", confidence: 0.88 },
|
| 909 |
+
loader: { opinion: "Model weights loaded correctly. Weight layout is standard.", confidence: 0.80 },
|
| 910 |
+
},
|
| 911 |
+
inspect_logs: "[FlashInfer] GPU: NVIDIA GH200 (sm_121)\n[FlashInfer] is_supported_arch(121) = False\n[FlashInfer] Architecture check FAILED\n[CUDA] All CUDA operations nominal\n[System] GPU memory: 96GB available",
|
| 912 |
+
inspect_config: "gpu_architecture: sm_121\ncuda_version: 13.0\nflashinfer_version: 0.4.1\nsupported_archs: [70, 75, 80, 86, 89, 90]",
|
| 913 |
+
followup_dispatch: "The dispatch table maps arch -> kernel. SM121 has no entry. Adding sm_12x family to the arch check should resolve this.",
|
| 914 |
+
};
|
| 915 |
+
|
| 916 |
+
function sleep(ms) { return new Promise(function(r) { setTimeout(r, ms); }); }
|
| 917 |
+
|
| 918 |
+
/* ══════════════════════════════════════════════
|
| 919 |
+
LIVE ENVIRONMENT CONNECTION
|
| 920 |
+
══════════════════════════════════════════════ */
|
| 921 |
+
var SERVER = { url: '', ws: null };
|
| 922 |
+
|
| 923 |
+
function getServerUrl() { return document.getElementById('serverUrl').value.replace(/\/$/, ''); }
|
| 924 |
+
function getWsUrl() {
|
| 925 |
+
var base = getServerUrl();
|
| 926 |
+
if (!base) {
|
| 927 |
+
// Same-origin: derive WS URL from current page
|
| 928 |
+
var proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
|
| 929 |
+
return proto + '//' + location.host + '/ws';
|
| 930 |
+
}
|
| 931 |
+
return base.replace(/^http/, 'ws') + '/ws';
|
| 932 |
+
}
|
| 933 |
+
function setConnStatus(text, cls) {
|
| 934 |
+
var el = document.getElementById('connStatus');
|
| 935 |
+
el.textContent = text; el.className = 'conn-status' + (cls ? ' ' + cls : '');
|
| 936 |
+
}
|
| 937 |
+
|
| 938 |
+
/* WebSocket-based communication — maintains session state across reset/step */
|
| 939 |
+
function wsConnect() {
|
| 940 |
+
return new Promise(function(resolve, reject) {
|
| 941 |
+
var url = getWsUrl();
|
| 942 |
+
var ws = new WebSocket(url);
|
| 943 |
+
ws.onopen = function() { SERVER.ws = ws; resolve(ws); };
|
| 944 |
+
ws.onerror = function(e) { reject(new Error('WebSocket connection failed')); };
|
| 945 |
+
ws.onclose = function() { SERVER.ws = null; };
|
| 946 |
+
});
|
| 947 |
+
}
|
| 948 |
+
|
| 949 |
+
function wsSend(type, data) {
|
| 950 |
+
return new Promise(function(resolve, reject) {
|
| 951 |
+
if (!SERVER.ws || SERVER.ws.readyState !== WebSocket.OPEN) {
|
| 952 |
+
reject(new Error('WebSocket not connected')); return;
|
| 953 |
+
}
|
| 954 |
+
SERVER.ws.onmessage = function(evt) {
|
| 955 |
+
try {
|
| 956 |
+
var msg = JSON.parse(evt.data);
|
| 957 |
+
resolve(msg); /* Always resolve — caller handles errors */
|
| 958 |
+
}
|
| 959 |
+
catch (e) { reject(new Error('Bad JSON from server')); }
|
| 960 |
+
};
|
| 961 |
+
SERVER.ws.send(JSON.stringify({ type: type, data: data || {} }));
|
| 962 |
+
});
|
| 963 |
+
}
|
| 964 |
+
|
| 965 |
+
function wsClose() {
|
| 966 |
+
if (SERVER.ws) { SERVER.ws.close(); SERVER.ws = null; }
|
| 967 |
+
}
|
| 968 |
+
|
| 969 |
+
/* Convenience wrappers — returns {observation, reward, done} from data envelope */
|
| 970 |
+
async function serverReset(body) {
|
| 971 |
+
if (!SERVER.ws) await wsConnect();
|
| 972 |
+
var msg = await wsSend('reset', body || {});
|
| 973 |
+
if (msg.type === 'error') throw new Error((msg.data && msg.data.message) || 'Reset failed');
|
| 974 |
+
return msg.data; /* {observation, reward, done} */
|
| 975 |
+
}
|
| 976 |
+
|
| 977 |
+
async function serverStep(actionMessage) {
|
| 978 |
+
/* WS step format: {type: "step", data: {message: "..."}} — NOT wrapped in action */
|
| 979 |
+
var msg = await wsSend('step', { message: actionMessage });
|
| 980 |
+
if (msg.type === 'error') {
|
| 981 |
+
/* Environment validation error (invalid target, specialist, etc.) — return penalty */
|
| 982 |
+
var errMsg = (msg.data && msg.data.message) || 'Unknown error';
|
| 983 |
+
return { observation: { output: 'Error: ' + errMsg }, reward: -2.0, done: false };
|
| 984 |
+
}
|
| 985 |
+
return msg.data; /* {observation, reward, done} */
|
| 986 |
+
}
|
| 987 |
+
|
| 988 |
+
function disableButtons() {
|
| 989 |
+
var btn = document.getElementById('demoBtn');
|
| 990 |
+
btn.disabled = true; btn.textContent = '\u25CF Running...';
|
| 991 |
+
}
|
| 992 |
+
function enableButtons() {
|
| 993 |
+
var btn = document.getElementById('demoBtn');
|
| 994 |
+
btn.disabled = false; btn.textContent = '\u25B6 Run Comparison (Untrained \u2192 Trained)';
|
| 995 |
+
}
|
| 996 |
+
|
| 997 |
+
/* Map root_cause to a layer name for the architecture diagram */
|
| 998 |
+
var CAUSE_TO_LAYER = {
|
| 999 |
+
arch_guard: 'backend', backend_whitelist: 'backend', backend_selector: 'backend',
|
| 1000 |
+
runtime_loader: 'runtime', driver_compat: 'driver',
|
| 1001 |
+
model_config: 'model', weight_layout: 'model',
|
| 1002 |
+
memory_oom: 'memory', quantization_error: 'kernel',
|
| 1003 |
+
distributed_comm: 'runtime'
|
| 1004 |
+
};
|
| 1005 |
+
|
| 1006 |
+
/* Parse the environment observation to extract structured info */
|
| 1007 |
+
function parseObs(obs) {
|
| 1008 |
+
return {
|
| 1009 |
+
output: obs.output || '',
|
| 1010 |
+
ticket: obs.incident_ticket || '',
|
| 1011 |
+
hardware: obs.hardware || '',
|
| 1012 |
+
model: obs.model_name || '',
|
| 1013 |
+
backend: obs.backend || '',
|
| 1014 |
+
log: obs.log_excerpt || '',
|
| 1015 |
+
snippet: obs.code_snippet || '',
|
| 1016 |
+
specialists: obs.specialist_opinions || {},
|
| 1017 |
+
remaining: obs.steps_remaining,
|
| 1018 |
+
fixUsed: obs.fix_used,
|
| 1019 |
+
done: obs.done,
|
| 1020 |
+
reward: obs.reward || 0,
|
| 1021 |
+
meta: obs.metadata || {}
|
| 1022 |
+
};
|
| 1023 |
+
}
|
| 1024 |
+
|
| 1025 |
+
async function runLive(mode) {
|
| 1026 |
+
disableButtons(); resetState(); setConnStatus('Connecting...', 'running');
|
| 1027 |
+
|
| 1028 |
+
try {
|
| 1029 |
+
// Step 0: Reset — get a real scenario via WebSocket (stateful session)
|
| 1030 |
+
var resetResp = await serverReset({});
|
| 1031 |
+
var obs = parseObs(resetResp.observation || resetResp);
|
| 1032 |
+
setConnStatus('Connected', 'connected');
|
| 1033 |
+
startTimer();
|
| 1034 |
+
setStatus('Incident Received', 'warning');
|
| 1035 |
+
|
| 1036 |
+
// Extract scenario ID from metadata
|
| 1037 |
+
var scenarioId = (obs.meta && obs.meta.scenario_id) || 'unknown';
|
| 1038 |
+
document.getElementById('scenarioId').textContent = scenarioId;
|
| 1039 |
+
document.getElementById('detail-model').textContent = obs.model;
|
| 1040 |
+
document.getElementById('detail-backend').textContent = obs.backend;
|
| 1041 |
+
document.getElementById('detail-driver').textContent = obs.hardware;
|
| 1042 |
+
|
| 1043 |
+
// Populate incident card from real data
|
| 1044 |
+
addIncidentCard({
|
| 1045 |
+
incident_ticket: obs.ticket,
|
| 1046 |
+
hardware: obs.hardware, model_name: obs.model, backend: obs.backend
|
| 1047 |
+
});
|
| 1048 |
+
populateSpecialists(obs.specialists);
|
| 1049 |
+
await sleep(1800);
|
| 1050 |
+
|
| 1051 |
+
if (mode === 'untrained') {
|
| 1052 |
+
await runLiveUntrained(obs);
|
| 1053 |
+
} else {
|
| 1054 |
+
await runLiveTrained(obs);
|
| 1055 |
+
}
|
| 1056 |
+
|
| 1057 |
+
await sleep(6000);
|
| 1058 |
+
document.getElementById('diagnosisOverlay').classList.remove('visible');
|
| 1059 |
+
} catch (e) {
|
| 1060 |
+
console.error('Live connection failed:', e);
|
| 1061 |
+
addLogEntry({ type: 'submit', label: 'CONNECTION ERROR', reward: 0, text: e.message + '\n' + (e.stack || '') });
|
| 1062 |
+
wsClose();
|
| 1063 |
+
setConnStatus('Offline mode', 'running');
|
| 1064 |
+
await runOffline(mode);
|
| 1065 |
+
await sleep(6000);
|
| 1066 |
+
document.getElementById('diagnosisOverlay').classList.remove('visible');
|
| 1067 |
+
}
|
| 1068 |
+
wsClose();
|
| 1069 |
+
enableButtons();
|
| 1070 |
+
}
|
| 1071 |
+
|
| 1072 |
+
/* ══════════════════════════════════════════════
|
| 1073 |
+
OFFLINE FALLBACK (no server needed)
|
| 1074 |
+
══════════════════════════════════════════════ */
|
| 1075 |
+
async function runOffline(mode) {
|
| 1076 |
+
var sc = DEMO;
|
| 1077 |
+
startTimer();
|
| 1078 |
+
setStatus('Incident Received', 'warning');
|
| 1079 |
+
document.getElementById('scenarioId').textContent = sc.id;
|
| 1080 |
+
document.getElementById('detail-model').textContent = sc.model_name;
|
| 1081 |
+
document.getElementById('detail-backend').textContent = sc.backend;
|
| 1082 |
+
document.getElementById('detail-driver').textContent = sc.hardware;
|
| 1083 |
+
addIncidentCard(sc); populateSpecialists(sc.specialist_opinions);
|
| 1084 |
+
await sleep(1800);
|
| 1085 |
+
if (mode === 'untrained') { await runOfflineUntrained(sc); }
|
| 1086 |
+
else { await runOfflineTrained(sc); }
|
| 1087 |
+
}
|
| 1088 |
+
|
| 1089 |
+
async function runOfflineUntrained(sc) {
|
| 1090 |
+
state.step = 1; updateSteps(1); setStatus('Model Acting', 'warning');
|
| 1091 |
+
addLogEntry({ type: 'submit', label: 'Blind Submit (no investigation)', reward: 0,
|
| 1092 |
+
text: 'Model skips investigation.\nOutput: [{"type":"submit","root_cause":"runtime_loader","fix":"fix_runtime_path","justification":"maybe"}]' });
|
| 1093 |
+
await sleep(1800);
|
| 1094 |
+
document.getElementById('rootCauseValue').textContent = 'runtime_loader';
|
| 1095 |
+
document.getElementById('rootCauseValue').className = 'vital-value coral';
|
| 1096 |
+
setLayerState('runtime', 'identified');
|
| 1097 |
+
var totalReward = -11.5;
|
| 1098 |
+
addLogEntry({ type: 'reward_breakdown', label: 'Reward Breakdown', reward: totalReward,
|
| 1099 |
+
text: 'Root cause: runtime_loader \u2717 (expected arch_guard) \u2192 -8.0\nFix: fix_runtime_path \u2717 (expected relax_arch_check) \u2192 -2.0\nNo investigation \u2192 -1.0\nJustification too short \u2192 -0.5' });
|
| 1100 |
+
updateReward(totalReward);
|
| 1101 |
+
state.done = true; setStatus('Diagnosis Submitted', 'error');
|
| 1102 |
+
document.getElementById('diagnosisValue').textContent = '\u2717 Incorrect';
|
| 1103 |
+
document.getElementById('diagnosisValue').className = 'vital-value coral';
|
| 1104 |
+
markSpecialist('runtime', false); markSpecialist('dispatch', true);
|
| 1105 |
+
markSpecialist('kernel', true); markSpecialist('loader', false);
|
| 1106 |
+
await sleep(1500); stopTimer();
|
| 1107 |
+
showDiagnosis({ rcCorrect: false, fixCorrect: false, rootCause: 'runtime_loader', fix: 'fix_runtime_path',
|
| 1108 |
+
correctRc: 'arch_guard', correctFix: 'relax_arch_check', totalReward: state.reward });
|
| 1109 |
+
}
|
| 1110 |
+
|
| 1111 |
+
async function runOfflineTrained(sc) {
|
| 1112 |
+
state.step = 1; updateSteps(1); setStatus('Investigating', 'warning');
|
| 1113 |
+
setLayerState('runtime', 'scanning');
|
| 1114 |
+
addLogEntry({ type: 'inspect', label: 'Inspect Logs', reward: -0.25, text: sc.inspect_logs });
|
| 1115 |
+
updateReward(-0.25); await sleep(2200);
|
| 1116 |
+
|
| 1117 |
+
state.step = 2; updateSteps(2);
|
| 1118 |
+
setLayerState('runtime', ''); setLayerState('backend', 'scanning');
|
| 1119 |
+
addLogEntry({ type: 'inspect', label: 'Inspect Config', reward: -0.25, text: sc.inspect_config });
|
| 1120 |
+
updateReward(-0.25); await sleep(2000);
|
| 1121 |
+
|
| 1122 |
+
state.step = 3; updateSteps(3);
|
| 1123 |
+
setLayerState('backend', 'identified'); highlightSpecialist('dispatch');
|
| 1124 |
+
addLogEntry({ type: 'ask_specialist', label: 'Query: Dispatch', reward: -0.25, text: sc.followup_dispatch });
|
| 1125 |
+
updateReward(-0.25);
|
| 1126 |
+
document.getElementById('rootCauseValue').textContent = 'arch_guard';
|
| 1127 |
+
document.getElementById('rootCauseValue').className = 'vital-value amber';
|
| 1128 |
+
await sleep(2200);
|
| 1129 |
+
|
| 1130 |
+
state.step = 4; updateSteps(4); setStatus('Applying Fix', 'warning');
|
| 1131 |
+
state.fixApplied = true; setLayerState('backend', 'resolved');
|
| 1132 |
+
addLogEntry({ type: 'apply_fix', label: 'Apply Fix: relax_arch_check', reward: 3.0, text: 'Fix applied successfully. Systems recovering.' });
|
| 1133 |
+
updateReward(3.0);
|
| 1134 |
+
document.getElementById('fixStatus').textContent = '\u2713 Applied';
|
| 1135 |
+
document.getElementById('fixStatus').className = 'vital-value emerald';
|
| 1136 |
+
['backend', 'runtime', 'model', 'memory', 'driver'].forEach(function(l, i) {
|
| 1137 |
+
setTimeout(function() { setLayerState(l, 'resolved'); }, i * 300);
|
| 1138 |
+
}); await sleep(2000);
|
| 1139 |
+
|
| 1140 |
+
state.step = 5; updateSteps(5); state.done = true;
|
| 1141 |
+
setStatus('Diagnosis Submitted', 'success');
|
| 1142 |
+
addLogEntry({ type: 'submit', label: 'Submit Diagnosis', reward: 19.0,
|
| 1143 |
+
text: 'Root cause: arch_guard \u2713\nFix: relax_arch_check \u2713\nJustification: Logs show sm_121 rejected by arch check. Dispatch confirmed SM121 supports HMMA. Config missing sm_12x in supported_archs.' });
|
| 1144 |
+
updateReward(19.0);
|
| 1145 |
+
document.getElementById('diagnosisValue').textContent = '\u2713 Correct';
|
| 1146 |
+
document.getElementById('diagnosisValue').className = 'vital-value emerald';
|
| 1147 |
+
markSpecialist('runtime', false); markSpecialist('dispatch', true);
|
| 1148 |
+
markSpecialist('kernel', true); markSpecialist('loader', false);
|
| 1149 |
+
await sleep(1500); stopTimer();
|
| 1150 |
+
showDiagnosis({ rcCorrect: true, fixCorrect: true, rootCause: 'arch_guard', fix: 'relax_arch_check',
|
| 1151 |
+
correctRc: 'arch_guard', correctFix: 'relax_arch_check', totalReward: state.reward });
|
| 1152 |
+
}
|
| 1153 |
+
|
| 1154 |
+
/* ── UNTRAINED: blind submit, no investigation ── */
|
| 1155 |
+
async function runLiveUntrained(initObs) {
|
| 1156 |
+
setStatus('Model Acting', 'warning');
|
| 1157 |
+
state.step = 1; updateSteps(1);
|
| 1158 |
+
|
| 1159 |
+
// Untrained model skips all investigation — just submits a random wrong guess
|
| 1160 |
+
addLogEntry({ type: 'inspect', label: 'Untrained Model Behavior', reward: 0,
|
| 1161 |
+
text: 'Model receives incident but skips investigation.\nNo logs read. No config checked. No specialists queried.\nImmediately submits a blind guess...' });
|
| 1162 |
+
await sleep(2000);
|
| 1163 |
+
|
| 1164 |
+
// Send a deliberately wrong submit to the real environment
|
| 1165 |
+
state.step = 2; updateSteps(2);
|
| 1166 |
+
var stepResp = await serverStep('{"type":"submit","root_cause":"runtime_loader","fix":"fix_runtime_path","justification":"idk"}');
|
| 1167 |
+
var obs = parseObs(stepResp.observation || stepResp);
|
| 1168 |
+
var stepReward = stepResp.reward !== undefined ? stepResp.reward : obs.reward;
|
| 1169 |
+
setConnStatus('Connected', 'connected');
|
| 1170 |
+
|
| 1171 |
+
// Parse the real environment response
|
| 1172 |
+
var outputText = obs.output;
|
| 1173 |
+
var rcCorrect = outputText.indexOf('CORRECT') !== -1 && outputText.indexOf('Root cause') !== -1
|
| 1174 |
+
&& outputText.split('Root cause')[1].split('\n')[0].indexOf('CORRECT') !== -1;
|
| 1175 |
+
var fixCorrect = outputText.indexOf('CORRECT') !== -1 && outputText.indexOf('Fix:') !== -1
|
| 1176 |
+
&& outputText.split('Fix:')[1].split('\n')[0].indexOf('CORRECT') !== -1;
|
| 1177 |
+
|
| 1178 |
+
// Extract actual correct answers from output
|
| 1179 |
+
var correctRc = ''; var correctFix = '';
|
| 1180 |
+
var rcMatch = outputText.match(/WRONG \(was: (\w+)\)/);
|
| 1181 |
+
if (rcMatch) correctRc = rcMatch[1];
|
| 1182 |
+
var fixMatch = outputText.match(/Fix:.*WRONG \(was: (\w+)\)/);
|
| 1183 |
+
if (fixMatch) correctFix = fixMatch[1];
|
| 1184 |
+
|
| 1185 |
+
document.getElementById('rootCauseValue').textContent = 'runtime_loader';
|
| 1186 |
+
document.getElementById('rootCauseValue').className = 'vital-value coral';
|
| 1187 |
+
|
| 1188 |
+
addLogEntry({ type: 'submit', label: 'Blind Submit', reward: stepReward,
|
| 1189 |
+
text: outputText });
|
| 1190 |
+
updateReward(stepReward);
|
| 1191 |
+
|
| 1192 |
+
state.done = true;
|
| 1193 |
+
setStatus('Diagnosis Submitted', obs.reward >= 0 ? 'success' : 'error');
|
| 1194 |
+
document.getElementById('diagnosisValue').textContent = rcCorrect ? '\u2713 Correct' : '\u2717 Incorrect';
|
| 1195 |
+
document.getElementById('diagnosisValue').className = 'vital-value ' + (rcCorrect ? 'emerald' : 'coral');
|
| 1196 |
+
|
| 1197 |
+
await sleep(1500); stopTimer();
|
| 1198 |
+
showDiagnosis({
|
| 1199 |
+
rcCorrect: rcCorrect, fixCorrect: fixCorrect,
|
| 1200 |
+
rootCause: 'runtime_loader', fix: 'fix_runtime_path',
|
| 1201 |
+
correctRc: correctRc || 'unknown', correctFix: correctFix || 'unknown',
|
| 1202 |
+
totalReward: state.reward
|
| 1203 |
+
});
|
| 1204 |
+
}
|
| 1205 |
+
|
| 1206 |
+
/* ── TRAINED: investigate, then diagnose ── */
|
| 1207 |
+
async function runLiveTrained(initObs) {
|
| 1208 |
+
setStatus('Investigating', 'warning');
|
| 1209 |
+
|
| 1210 |
+
// Step 1: Inspect logs
|
| 1211 |
+
state.step = 1; updateSteps(1);
|
| 1212 |
+
setLayerState('runtime', 'scanning');
|
| 1213 |
+
var step1 = await serverStep('{"type":"inspect","target":"logs"}');
|
| 1214 |
+
var obs1 = parseObs(step1.observation || step1);
|
| 1215 |
+
var rew1 = step1.reward !== undefined ? step1.reward : obs1.reward;
|
| 1216 |
+
addLogEntry({ type: 'inspect', label: 'Inspect Logs', reward: rew1, text: obs1.output });
|
| 1217 |
+
updateReward(rew1);
|
| 1218 |
+
await sleep(2200);
|
| 1219 |
+
|
| 1220 |
+
// Step 2: Inspect config
|
| 1221 |
+
state.step = 2; updateSteps(2);
|
| 1222 |
+
setLayerState('runtime', ''); setLayerState('backend', 'scanning');
|
| 1223 |
+
var step2 = await serverStep('{"type":"inspect","target":"config"}');
|
| 1224 |
+
var obs2 = parseObs(step2.observation || step2);
|
| 1225 |
+
var rew2 = step2.reward !== undefined ? step2.reward : obs2.reward;
|
| 1226 |
+
addLogEntry({ type: 'inspect', label: 'Inspect Config', reward: rew2, text: obs2.output });
|
| 1227 |
+
updateReward(rew2);
|
| 1228 |
+
await sleep(2000);
|
| 1229 |
+
|
| 1230 |
+
// Step 3: Query a specialist — pick dispatch as a reasonable investigation choice
|
| 1231 |
+
state.step = 3; updateSteps(3);
|
| 1232 |
+
setLayerState('backend', 'identified');
|
| 1233 |
+
highlightSpecialist('dispatch');
|
| 1234 |
+
var step3 = await serverStep('{"type":"ask_specialist","specialist":"dispatch"}');
|
| 1235 |
+
var obs3 = parseObs(step3.observation || step3);
|
| 1236 |
+
var rew3 = step3.reward !== undefined ? step3.reward : obs3.reward;
|
| 1237 |
+
addLogEntry({ type: 'ask_specialist', label: 'Query: Dispatch', reward: rew3, text: obs3.output });
|
| 1238 |
+
updateReward(rew3);
|
| 1239 |
+
await sleep(2200);
|
| 1240 |
+
|
| 1241 |
+
// Step 4: Smart submit — analyze the logs/config to guess the right answer
|
| 1242 |
+
// For the demo, we use the scenario hints from the environment output to make an informed guess
|
| 1243 |
+
// A real trained model would parse the observations and infer the root cause
|
| 1244 |
+
state.step = 4; updateSteps(4); setStatus('Diagnosing', 'warning');
|
| 1245 |
+
|
| 1246 |
+
// Attempt to extract the root cause from clues in the observations
|
| 1247 |
+
var allText = obs1.output + ' ' + obs2.output + ' ' + obs3.output;
|
| 1248 |
+
var guessRc = inferRootCause(allText);
|
| 1249 |
+
var guessFix = RC_TO_FIX[guessRc] || 'switch_backend';
|
| 1250 |
+
var justification = 'Logs and config analysis indicates ' + guessRc + '. Dispatch specialist confirmed. Applying ' + guessFix + '.';
|
| 1251 |
+
|
| 1252 |
+
document.getElementById('rootCauseValue').textContent = guessRc;
|
| 1253 |
+
document.getElementById('rootCauseValue').className = 'vital-value amber';
|
| 1254 |
+
|
| 1255 |
+
// Apply fix first
|
| 1256 |
+
var step4 = await serverStep(JSON.stringify({ type: 'apply_fix', fix: guessFix }));
|
| 1257 |
+
var obs4 = parseObs(step4.observation || step4);
|
| 1258 |
+
var rew4 = step4.reward !== undefined ? step4.reward : obs4.reward;
|
| 1259 |
+
var fixWorked = rew4 > 0;
|
| 1260 |
+
addLogEntry({ type: 'apply_fix', label: 'Apply Fix: ' + guessFix, reward: rew4, text: obs4.output });
|
| 1261 |
+
updateReward(rew4);
|
| 1262 |
+
document.getElementById('fixStatus').textContent = fixWorked ? '\u2713 Applied' : '\u2717 Failed';
|
| 1263 |
+
document.getElementById('fixStatus').className = 'vital-value ' + (fixWorked ? 'emerald' : 'coral');
|
| 1264 |
+
|
| 1265 |
+
if (fixWorked) {
|
| 1266 |
+
var layers = ['backend', 'runtime', 'model', 'memory', 'driver'];
|
| 1267 |
+
layers.forEach(function(l, i) { setTimeout(function() { setLayerState(l, 'resolved'); }, i * 300); });
|
| 1268 |
+
}
|
| 1269 |
+
await sleep(2000);
|
| 1270 |
+
|
| 1271 |
+
// Step 5: Submit diagnosis
|
| 1272 |
+
var isDone4 = step4.done !== undefined ? step4.done : obs4.done;
|
| 1273 |
+
if (!isDone4) {
|
| 1274 |
+
state.step = 5; updateSteps(5);
|
| 1275 |
+
var step5 = await serverStep(JSON.stringify({ type: 'submit', root_cause: guessRc, fix: guessFix, justification: justification }));
|
| 1276 |
+
var obs5 = parseObs(step5.observation || step5);
|
| 1277 |
+
var rew5 = step5.reward !== undefined ? step5.reward : obs5.reward;
|
| 1278 |
+
addLogEntry({ type: 'submit', label: 'Submit Diagnosis', reward: rew5, text: obs5.output });
|
| 1279 |
+
updateReward(rew5);
|
| 1280 |
+
|
| 1281 |
+
var outputText = obs5.output;
|
| 1282 |
+
var rcCorrect = outputText.indexOf('Root cause') !== -1 && outputText.split('Root cause')[1].split('\n')[0].indexOf('CORRECT') !== -1;
|
| 1283 |
+
var fixCorrect2 = outputText.indexOf('Fix:') !== -1 && outputText.split('Fix:')[1].split('\n')[0].indexOf('CORRECT') !== -1;
|
| 1284 |
+
|
| 1285 |
+
state.done = true;
|
| 1286 |
+
setStatus('Diagnosis Submitted', rcCorrect ? 'success' : 'error');
|
| 1287 |
+
document.getElementById('diagnosisValue').textContent = rcCorrect ? '\u2713 Correct' : '\u2717 Incorrect';
|
| 1288 |
+
document.getElementById('diagnosisValue').className = 'vital-value ' + (rcCorrect ? 'emerald' : 'coral');
|
| 1289 |
+
|
| 1290 |
+
var correctRc = guessRc; var correctFix = guessFix;
|
| 1291 |
+
var rcWrong = outputText.match(/WRONG \(was: (\w+)\)/);
|
| 1292 |
+
if (rcWrong) correctRc = rcWrong[1];
|
| 1293 |
+
var fixWrong = outputText.match(/Fix:.*WRONG \(was: (\w+)\)/);
|
| 1294 |
+
if (fixWrong) correctFix = fixWrong[1];
|
| 1295 |
+
|
| 1296 |
+
await sleep(1500); stopTimer();
|
| 1297 |
+
showDiagnosis({
|
| 1298 |
+
rcCorrect: rcCorrect && !rcWrong, fixCorrect: fixCorrect2 && !fixWrong,
|
| 1299 |
+
rootCause: guessRc, fix: guessFix,
|
| 1300 |
+
correctRc: correctRc, correctFix: correctFix,
|
| 1301 |
+
totalReward: state.reward
|
| 1302 |
+
});
|
| 1303 |
+
} else {
|
| 1304 |
+
state.done = true; stopTimer();
|
| 1305 |
+
}
|
| 1306 |
+
}
|
| 1307 |
+
|
| 1308 |
+
/* ══════════════════════════════════════════════
|
| 1309 |
+
LIVE MODEL INFERENCE — real Qwen 1.5B via MLX
|
| 1310 |
+
══════════════════════════════════════════════ */
|
| 1311 |
+
/* Inference URL: same origin on HF Spaces, localhost:8001 locally */
|
| 1312 |
+
var INFERENCE_URL = (location.hostname === 'localhost' || location.hostname === '127.0.0.1') ? 'http://localhost:8001' : '';
|
| 1313 |
+
|
| 1314 |
+
function extractActionsJS(text) {
|
| 1315 |
+
text = text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
| 1316 |
+
var start = text.indexOf('['), end = text.lastIndexOf(']');
|
| 1317 |
+
if (start !== -1 && end > start) {
|
| 1318 |
+
try { var a = JSON.parse(text.slice(start, end+1)); if (Array.isArray(a)) return a.filter(function(x){return typeof x==='object';}); } catch(e) {}
|
| 1319 |
+
}
|
| 1320 |
+
try { var a = JSON.parse(text); if (Array.isArray(a)) return a.filter(function(x){return typeof x==='object';}); if (typeof a === 'object') return [a]; } catch(e) {}
|
| 1321 |
+
return null;
|
| 1322 |
+
}
|
| 1323 |
+
|
| 1324 |
+
var UNTRAINED_SYSTEM = 'You are Stack Doctor, an expert AI agent that diagnoses inference-stack incidents.\nYou receive an incident ticket with hardware/model/backend context, log excerpts, and specialist opinions.\nSome specialists may be wrong. Output a JSON array of actions:\n {"type":"inspect","target":"logs|config|snippet|metrics"}\n {"type":"ask_specialist","specialist":"runtime|dispatch|kernel|loader"}\n {"type":"apply_fix","fix":"<fix_name>"}\n {"type":"submit","root_cause":"<cause>","fix":"<fix>","justification":"<why>"}';
|
| 1325 |
+
|
| 1326 |
+
var TRAINED_SYSTEM = 'You are Stack Doctor, an expert AI agent that diagnoses inference-stack incidents.\nYou are methodical: first inspect logs and config, then query specialists to cross-verify (some lie), then apply a fix and submit.\n\nAvailable actions (output as a JSON array):\n {"type":"inspect","target":"logs"} or "config" or "snippet" or "metrics"\n {"type":"ask_specialist","specialist":"runtime"} or "dispatch" or "kernel" or "loader"\n {"type":"apply_fix","fix":"<name>"} — available fixes: add_whitelist_entry, fix_comm_config, fix_quantization, fix_runtime_path, fix_weight_mapping, relax_arch_check, switch_backend, tune_memory_config, update_driver_config, update_model_config\n {"type":"submit","root_cause":"<cause>","fix":"<fix>","justification":"<detailed reasoning>"}\n\nAvailable root causes: arch_guard, backend_selector, backend_whitelist, distributed_comm, driver_compat, memory_oom, model_config, quantization_error, runtime_loader, weight_layout\n\nIMPORTANT: Pick ONE target per inspect, ONE specialist per query. Investigate before submitting. Give a detailed justification.\n\nExample output:\n[{"type":"inspect","target":"logs"},{"type":"inspect","target":"config"},{"type":"ask_specialist","specialist":"kernel"},{"type":"apply_fix","fix":"relax_arch_check"},{"type":"submit","root_cause":"arch_guard","fix":"relax_arch_check","justification":"Logs show architecture check failure for SM90 on the backend. Config confirms the guard is enabled. Kernel specialist confirmed this is not a kernel issue. Relaxing the arch check resolves the incompatibility."}]';
|
| 1327 |
+
|
| 1328 |
+
async function runComparison() {
|
| 1329 |
+
disableButtons();
|
| 1330 |
+
|
| 1331 |
+
// 1. Pick a random scenario ID to pin both runs to the same problem
|
| 1332 |
+
var scenarioIds = ['arch_guard_01','arch_guard_02','backend_whitelist_01','backend_whitelist_02','runtime_loader_01','runtime_loader_02','backend_selector_01'];
|
| 1333 |
+
var scenarioId = scenarioIds[Math.floor(Math.random() * scenarioIds.length)];
|
| 1334 |
+
|
| 1335 |
+
// 2. Run untrained
|
| 1336 |
+
await runLiveModel('untrained', scenarioId);
|
| 1337 |
+
|
| 1338 |
+
// 3. Pause between runs
|
| 1339 |
+
await sleep(3000);
|
| 1340 |
+
addLogEntry({ type: 'inspect', label: '--- Now running TRAINED model on same scenario ---', reward: 0, text: 'Same incident: ' + scenarioId });
|
| 1341 |
+
await sleep(2000);
|
| 1342 |
+
resetState();
|
| 1343 |
+
|
| 1344 |
+
// 4. Run trained on same scenario
|
| 1345 |
+
await runLiveModel('trained', scenarioId);
|
| 1346 |
+
|
| 1347 |
+
enableButtons();
|
| 1348 |
+
}
|
| 1349 |
+
|
| 1350 |
+
async function runLiveModel(mode, scenarioId) {
|
| 1351 |
+
resetState(); setConnStatus('Connecting...', 'running');
|
| 1352 |
+
var isTrained = mode === 'trained';
|
| 1353 |
+
var ws = null;
|
| 1354 |
+
|
| 1355 |
+
try {
|
| 1356 |
+
// 1. Raw WebSocket connect (derive WS URL from server URL or same-origin)
|
| 1357 |
+
var base = getServerUrl();
|
| 1358 |
+
var wsUrl;
|
| 1359 |
+
if (!base) {
|
| 1360 |
+
var proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
|
| 1361 |
+
wsUrl = proto + '//' + location.host + '/ws';
|
| 1362 |
+
} else {
|
| 1363 |
+
wsUrl = base.replace(/^http/, 'ws') + '/ws';
|
| 1364 |
+
}
|
| 1365 |
+
ws = new WebSocket(wsUrl);
|
| 1366 |
+
await new Promise(function(res, rej) {
|
| 1367 |
+
ws.onopen = res;
|
| 1368 |
+
ws.onerror = function() { rej(new Error('WebSocket connect failed')); };
|
| 1369 |
+
setTimeout(function() { rej(new Error('WebSocket timeout')); }, 5000);
|
| 1370 |
+
});
|
| 1371 |
+
|
| 1372 |
+
function wsSendRaw(type, data) {
|
| 1373 |
+
return new Promise(function(res, rej) {
|
| 1374 |
+
ws.onmessage = function(e) { res(JSON.parse(e.data)); };
|
| 1375 |
+
ws.send(JSON.stringify({type: type, data: data || {}}));
|
| 1376 |
+
setTimeout(function() { rej(new Error('WS ' + type + ' timeout')); }, 15000);
|
| 1377 |
+
});
|
| 1378 |
+
}
|
| 1379 |
+
|
| 1380 |
+
// 2. Reset with pinned scenario ID (same scenario for both runs)
|
| 1381 |
+
var resetData = scenarioId ? { scenario_id: scenarioId } : {};
|
| 1382 |
+
var resetMsg = await wsSendRaw('reset', resetData);
|
| 1383 |
+
if (resetMsg.type === 'error') throw new Error('Reset: ' + JSON.stringify(resetMsg.data));
|
| 1384 |
+
var obs = resetMsg.data.observation;
|
| 1385 |
+
setConnStatus('Connected', 'connected');
|
| 1386 |
+
startTimer();
|
| 1387 |
+
setStatus('Incident Received', 'warning');
|
| 1388 |
+
|
| 1389 |
+
document.getElementById('scenarioId').textContent = 'live';
|
| 1390 |
+
document.getElementById('detail-model').textContent = obs.model_name || '';
|
| 1391 |
+
document.getElementById('detail-backend').textContent = obs.backend || '';
|
| 1392 |
+
document.getElementById('detail-driver').textContent = obs.hardware || '';
|
| 1393 |
+
|
| 1394 |
+
addIncidentCard({ incident_ticket: obs.incident_ticket, hardware: obs.hardware, model_name: obs.model_name, backend: obs.backend });
|
| 1395 |
+
populateSpecialists(obs.specialist_opinions || {});
|
| 1396 |
+
await sleep(1000);
|
| 1397 |
+
|
| 1398 |
+
// 3. Build prompt from raw observation
|
| 1399 |
+
var opsStr = '';
|
| 1400 |
+
var specs = obs.specialist_opinions || {};
|
| 1401 |
+
for (var name in specs) {
|
| 1402 |
+
var o = specs[name];
|
| 1403 |
+
opsStr += ' ' + name + ': ' + o.opinion + ' (confidence: ' + o.confidence + ')\n';
|
| 1404 |
+
}
|
| 1405 |
+
var userPrompt = 'INCIDENT: ' + obs.incident_ticket + '\nHardware: ' + obs.hardware + ' | Model: ' + obs.model_name + ' | Backend: ' + obs.backend + '\nLOG:\n' + obs.log_excerpt + '\nSPECIALISTS:\n' + opsStr + '\nInvestigate and submit your diagnosis as a JSON action array.';
|
| 1406 |
+
|
| 1407 |
+
// 4. Call inference server
|
| 1408 |
+
setStatus(isTrained ? 'Trained Model Thinking...' : 'Untrained Model Thinking...', 'warning');
|
| 1409 |
+
addLogEntry({ type: 'inspect', label: 'Qwen 1.5B ' + (isTrained ? '(GRPO)' : '(Base)') + ' Generating...', reward: 0, text: 'Sending scenario to local MLX model for inference...' });
|
| 1410 |
+
|
| 1411 |
+
var genResp = await fetch(INFERENCE_URL + '/generate', {
|
| 1412 |
+
method: 'POST', headers: { 'Content-Type': 'application/json' },
|
| 1413 |
+
body: JSON.stringify({ prompt: userPrompt, max_tokens: 512, mode: isTrained ? 'trained' : 'untrained', system: isTrained ? TRAINED_SYSTEM : UNTRAINED_SYSTEM })
|
| 1414 |
+
}).then(function(r) { return r.json(); });
|
| 1415 |
+
|
| 1416 |
+
addLogEntry({ type: 'inspect', label: 'Model Output (' + genResp.gen_time.toFixed(1) + 's)', reward: 0, text: genResp.text.slice(0, 500) });
|
| 1417 |
+
await sleep(500);
|
| 1418 |
+
|
| 1419 |
+
// 5. Parse actions
|
| 1420 |
+
var actions = extractActionsJS(genResp.text);
|
| 1421 |
+
if (!actions || actions.length === 0) {
|
| 1422 |
+
addLogEntry({ type: 'submit', label: 'Parse Failed', reward: -5, text: 'Could not parse model output as JSON actions.\nRaw: ' + genResp.text.slice(0, 200) });
|
| 1423 |
+
updateReward(-5);
|
| 1424 |
+
state.done = true; setStatus('Parse Error', 'error'); stopTimer();
|
| 1425 |
+
ws.close(); enableButtons(); return;
|
| 1426 |
+
}
|
| 1427 |
+
|
| 1428 |
+
setStatus('Executing Actions', 'warning');
|
| 1429 |
+
|
| 1430 |
+
// 6. Execute each action via raw WebSocket
|
| 1431 |
+
var done = false;
|
| 1432 |
+
var lastOutput = '';
|
| 1433 |
+
var totalReward = 0;
|
| 1434 |
+
for (var i = 0; i < actions.length && !done; i++) {
|
| 1435 |
+
var action = actions[i];
|
| 1436 |
+
var aType = action.type || '?';
|
| 1437 |
+
state.step = i + 1; updateSteps(i + 1);
|
| 1438 |
+
|
| 1439 |
+
// Visual feedback on architecture diagram
|
| 1440 |
+
if (aType === 'inspect') setLayerState('runtime', 'scanning');
|
| 1441 |
+
if (aType === 'ask_specialist') highlightSpecialist(action.specialist || 'dispatch');
|
| 1442 |
+
if (aType === 'apply_fix') setLayerState('backend', 'identified');
|
| 1443 |
+
if (aType === 'submit') { setStatus('Diagnosing', 'warning'); document.getElementById('rootCauseValue').textContent = action.root_cause || '?'; }
|
| 1444 |
+
|
| 1445 |
+
var stepMsg = await wsSendRaw('step', { message: JSON.stringify(action) });
|
| 1446 |
+
|
| 1447 |
+
var rew = 0;
|
| 1448 |
+
var stepOutput = '';
|
| 1449 |
+
if (stepMsg.type === 'error') {
|
| 1450 |
+
rew = -2;
|
| 1451 |
+
stepOutput = 'Error: ' + ((stepMsg.data && stepMsg.data.message) || 'unknown');
|
| 1452 |
+
} else {
|
| 1453 |
+
rew = stepMsg.data.reward || 0;
|
| 1454 |
+
done = stepMsg.data.done || false;
|
| 1455 |
+
stepOutput = (stepMsg.data.observation && stepMsg.data.observation.output) || '';
|
| 1456 |
+
}
|
| 1457 |
+
totalReward += rew;
|
| 1458 |
+
|
| 1459 |
+
var label = aType;
|
| 1460 |
+
if (aType === 'inspect') label = 'Inspect: ' + (action.target || '?');
|
| 1461 |
+
if (aType === 'ask_specialist') label = 'Query: ' + (action.specialist || '?');
|
| 1462 |
+
if (aType === 'apply_fix') label = 'Fix: ' + (action.fix || '?');
|
| 1463 |
+
if (aType === 'submit') label = 'Submit Diagnosis';
|
| 1464 |
+
|
| 1465 |
+
addLogEntry({ type: aType, label: label, reward: rew, text: stepOutput });
|
| 1466 |
+
updateReward(rew);
|
| 1467 |
+
lastOutput = stepOutput;
|
| 1468 |
+
|
| 1469 |
+
// Green up layers on successful fix
|
| 1470 |
+
if (aType === 'apply_fix' && rew > 0) {
|
| 1471 |
+
['backend', 'runtime', 'model', 'memory', 'driver'].forEach(function(l, idx) {
|
| 1472 |
+
setTimeout(function() { setLayerState(l, 'resolved'); }, idx * 300);
|
| 1473 |
+
});
|
| 1474 |
+
}
|
| 1475 |
+
await sleep(1200);
|
| 1476 |
+
}
|
| 1477 |
+
|
| 1478 |
+
// 7. Final diagnosis
|
| 1479 |
+
state.done = true; stopTimer();
|
| 1480 |
+
if (lastOutput.indexOf('DIAGNOSIS SUBMITTED') !== -1 || lastOutput.indexOf('Root cause') !== -1 || done) {
|
| 1481 |
+
var rcCorrect = lastOutput.indexOf('CORRECT') !== -1 && lastOutput.indexOf('Root cause') !== -1 && lastOutput.split('Root cause')[1].split('\n')[0].indexOf('CORRECT') !== -1;
|
| 1482 |
+
setStatus('Diagnosis Submitted', totalReward > 0 ? 'success' : 'error');
|
| 1483 |
+
document.getElementById('diagnosisValue').textContent = totalReward > 0 ? '\u2713 Correct' : '\u2717 Incorrect';
|
| 1484 |
+
document.getElementById('diagnosisValue').className = 'vital-value ' + (totalReward > 0 ? 'emerald' : 'coral');
|
| 1485 |
+
|
| 1486 |
+
var submitAction = actions.find(function(a) { return a.type === 'submit'; }) || {};
|
| 1487 |
+
var correctRc = submitAction.root_cause || '?';
|
| 1488 |
+
var correctFix = submitAction.fix || '?';
|
| 1489 |
+
var rcWrong = lastOutput.match(/WRONG \(was: (\w+)\)/);
|
| 1490 |
+
if (rcWrong) correctRc = rcWrong[1];
|
| 1491 |
+
var fixWrong = lastOutput.match(/Fix:.*WRONG \(was: (\w+)\)/);
|
| 1492 |
+
if (fixWrong) correctFix = fixWrong[1];
|
| 1493 |
+
|
| 1494 |
+
showDiagnosis({
|
| 1495 |
+
rcCorrect: rcCorrect && !rcWrong, fixCorrect: !fixWrong,
|
| 1496 |
+
rootCause: submitAction.root_cause || '?', fix: submitAction.fix || '?',
|
| 1497 |
+
correctRc: correctRc, correctFix: correctFix,
|
| 1498 |
+
totalReward: state.reward
|
| 1499 |
+
});
|
| 1500 |
+
|
| 1501 |
+
await sleep(8000);
|
| 1502 |
+
document.getElementById('diagnosisOverlay').classList.remove('visible');
|
| 1503 |
+
} else {
|
| 1504 |
+
setStatus('Episode Ended', totalReward > 0 ? 'success' : 'error');
|
| 1505 |
+
}
|
| 1506 |
+
|
| 1507 |
+
} catch (e) {
|
| 1508 |
+
console.error('Live model error:', e);
|
| 1509 |
+
setConnStatus('Error: ' + e.message, 'error');
|
| 1510 |
+
setStatus('Error', 'error');
|
| 1511 |
+
alert('ERROR: ' + e.message);
|
| 1512 |
+
addLogEntry({ type: 'submit', label: 'ERROR', reward: 0, text: e.message + '\n' + (e.stack || '') });
|
| 1513 |
+
}
|
| 1514 |
+
if (ws) ws.close();
|
| 1515 |
+
enableButtons();
|
| 1516 |
+
}
|
| 1517 |
+
|
| 1518 |
+
/* Root cause inference from observation text — pattern matching on known signatures */
|
| 1519 |
+
var RC_TO_FIX = {
|
| 1520 |
+
arch_guard: 'relax_arch_check', backend_whitelist: 'add_whitelist_entry',
|
| 1521 |
+
runtime_loader: 'fix_runtime_path', backend_selector: 'switch_backend',
|
| 1522 |
+
model_config: 'update_model_config', weight_layout: 'fix_weight_mapping',
|
| 1523 |
+
memory_oom: 'tune_memory_config', quantization_error: 'fix_quantization',
|
| 1524 |
+
distributed_comm: 'fix_comm_config', driver_compat: 'update_driver_config'
|
| 1525 |
+
};
|
| 1526 |
+
|
| 1527 |
+
function inferRootCause(text) {
|
| 1528 |
+
var t = text.toLowerCase();
|
| 1529 |
+
if (t.indexOf('arch') !== -1 && (t.indexOf('guard') !== -1 || t.indexOf('unsupported') !== -1 || t.indexOf('architecture check') !== -1)) return 'arch_guard';
|
| 1530 |
+
if (t.indexOf('whitelist') !== -1 || t.indexOf('not in supported') !== -1) return 'backend_whitelist';
|
| 1531 |
+
if (t.indexOf('runtime') !== -1 && (t.indexOf('loader') !== -1 || t.indexOf('path') !== -1 || t.indexOf('dlopen') !== -1)) return 'runtime_loader';
|
| 1532 |
+
if (t.indexOf('backend') !== -1 && (t.indexOf('selector') !== -1 || t.indexOf('fallback') !== -1)) return 'backend_selector';
|
| 1533 |
+
if (t.indexOf('model') !== -1 && t.indexOf('config') !== -1 && (t.indexOf('mismatch') !== -1 || t.indexOf('invalid') !== -1)) return 'model_config';
|
| 1534 |
+
if (t.indexOf('weight') !== -1 && (t.indexOf('layout') !== -1 || t.indexOf('shape') !== -1)) return 'weight_layout';
|
| 1535 |
+
if (t.indexOf('oom') !== -1 || t.indexOf('out of memory') !== -1 || t.indexOf('memory') !== -1 && t.indexOf('exceed') !== -1) return 'memory_oom';
|
| 1536 |
+
if (t.indexOf('quantiz') !== -1 || t.indexOf('quant') !== -1 && t.indexOf('error') !== -1) return 'quantization_error';
|
| 1537 |
+
if (t.indexOf('nccl') !== -1 || t.indexOf('distributed') !== -1 || t.indexOf('comm') !== -1) return 'distributed_comm';
|
| 1538 |
+
if (t.indexOf('driver') !== -1 && (t.indexOf('compat') !== -1 || t.indexOf('version') !== -1)) return 'driver_compat';
|
| 1539 |
+
return 'arch_guard'; // fallback
|
| 1540 |
+
}
|
| 1541 |
+
|
| 1542 |
+
/* Poll model status on page load */
|
| 1543 |
+
(function checkModelStatus() {
|
| 1544 |
+
var statusUrl = INFERENCE_URL || '';
|
| 1545 |
+
var el = document.getElementById('modelStatus');
|
| 1546 |
+
fetch(statusUrl + '/model_status').then(function(r) { return r.json(); }).then(function(d) {
|
| 1547 |
+
if (d.ready) {
|
| 1548 |
+
el.textContent = 'Model: ready';
|
| 1549 |
+
el.className = 'conn-status connected';
|
| 1550 |
+
} else if (d.error) {
|
| 1551 |
+
el.textContent = 'Model: error';
|
| 1552 |
+
el.className = 'conn-status error';
|
| 1553 |
+
} else {
|
| 1554 |
+
el.textContent = 'Model: loading...';
|
| 1555 |
+
el.className = 'conn-status running';
|
| 1556 |
+
setTimeout(checkModelStatus, 3000);
|
| 1557 |
+
}
|
| 1558 |
+
}).catch(function() {
|
| 1559 |
+
/* Local dev without /model_status endpoint — assume ready */
|
| 1560 |
+
el.textContent = 'Model: ready';
|
| 1561 |
+
el.className = 'conn-status connected';
|
| 1562 |
+
});
|
| 1563 |
+
})();
|
| 1564 |
+
</script>
|
| 1565 |
+
</body>
|
| 1566 |
+
</html>
|