Spaces:
Running
Running
ztothez commited on
Commit Β·
4ab37ac
1
Parent(s): ed2a586
feat: add Qwen validator sidecar metadata
Browse files- agents/verifier_agent.py +5 -0
- demo_output.py +2 -0
- frontend/src/app/page.tsx +18 -2
- frontend/src/types/aegis.ts +2 -0
- server.py +31 -8
agents/verifier_agent.py
CHANGED
|
@@ -18,7 +18,12 @@ Verify whether high-fidelity red-team artifacts are covered by detection and res
|
|
| 18 |
"""),
|
| 19 |
]
|
| 20 |
content, metric = invoke_with_metrics(chat, messages, "verifier_agent")
|
|
|
|
|
|
|
|
|
|
| 21 |
return {
|
| 22 |
"verifier_output": content,
|
|
|
|
|
|
|
| 23 |
"metrics": merge_metrics(state, metric),
|
| 24 |
}
|
|
|
|
| 18 |
"""),
|
| 19 |
]
|
| 20 |
content, metric = invoke_with_metrics(chat, messages, "verifier_agent")
|
| 21 |
+
verifier_model = metric.get("model") or "Unknown verifier model"
|
| 22 |
+
verifier_model_role = metric.get("model_role") or metric.get("requested_role") or "unknown"
|
| 23 |
+
|
| 24 |
return {
|
| 25 |
"verifier_output": content,
|
| 26 |
+
"verifier_model": verifier_model,
|
| 27 |
+
"verifier_model_role": verifier_model_role,
|
| 28 |
"metrics": merge_metrics(state, metric),
|
| 29 |
}
|
demo_output.py
CHANGED
|
@@ -688,5 +688,7 @@ DEMO_INVOKE_RESULT = {
|
|
| 688 |
"blue_output": DEMO_BLUE_OUTPUT,
|
| 689 |
"response_output": DEMO_RESPONSE_OUTPUT,
|
| 690 |
"verifier_output": DEMO_VERIFIER_OUTPUT,
|
|
|
|
|
|
|
| 691 |
"metrics": DEMO_METRICS,
|
| 692 |
}
|
|
|
|
| 688 |
"blue_output": DEMO_BLUE_OUTPUT,
|
| 689 |
"response_output": DEMO_RESPONSE_OUTPUT,
|
| 690 |
"verifier_output": DEMO_VERIFIER_OUTPUT,
|
| 691 |
+
"verifier_model": "Qwen Validator Demo",
|
| 692 |
+
"verifier_model_role": "qwen",
|
| 693 |
"metrics": DEMO_METRICS,
|
| 694 |
}
|
frontend/src/app/page.tsx
CHANGED
|
@@ -120,6 +120,12 @@ export default function SOCCommandCenter() {
|
|
| 120 |
const heroPillDot = endpointOk ? "bg-aegis-green shadow-[0_0_6px_#22C55E]" : "bg-aegis-amber shadow-[0_0_6px_#F59E0B]";
|
| 121 |
const heroPillText = endpointOk ? `LIVE Β· VLLM ON ROCM Β· MI300X Β· ${modelName}` : "OFFLINE Β· DEMO FALLBACK ACTIVE";
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
// ββ Sub-Component: Artifact Grid (Canonical Naming) ββββββββββββββββββββββββ
|
| 124 |
const ArtifactGridComponent = ({ run }: { run: RunResult }) => {
|
| 125 |
// Generate canonical file prefix
|
|
@@ -449,7 +455,17 @@ export default function SOCCommandCenter() {
|
|
| 449 |
{/* LATEST REPORT HERO CARD */}
|
| 450 |
<div className="bg-gradient-to-br from-aegis-panel-3 to-aegis-panel border border-aegis-border-purple rounded-xl p-6 mb-6 shadow-aegis-card relative overflow-hidden">
|
| 451 |
<div className="absolute top-0 right-0 p-4 opacity-10 font-mono font-bold text-8xl text-aegis-purple">RPT</div>
|
| 452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
| 454 |
<div className="flex justify-between items-start">
|
| 455 |
<div>
|
|
@@ -673,7 +689,7 @@ export default function SOCCommandCenter() {
|
|
| 673 |
{ id: "red", label: "Threat Agent", model: "Llama 3.3 70B", top: "border-t-aegis-purple", text: "text-aegis-purple-soft", route: "primary" },
|
| 674 |
{ id: "blue", label: "Detection Agent", model: "Llama 3.3 70B", top: "border-t-aegis-blue", text: "text-aegis-blue-soft", route: "primary" },
|
| 675 |
{ id: "response", label: "Response Agent", model: "Llama 3.3 70B", top: "border-t-aegis-amber", text: "text-aegis-amber-soft", route: "primary" },
|
| 676 |
-
{ id: "verifier", label: "Validation Agent", model: "Qwen
|
| 677 |
].map((agent, i) => {
|
| 678 |
const isRunning = activeAgent === agent.id;
|
| 679 |
const elapsed = agentTimes[agent.id];
|
|
|
|
| 120 |
const heroPillDot = endpointOk ? "bg-aegis-green shadow-[0_0_6px_#22C55E]" : "bg-aegis-amber shadow-[0_0_6px_#F59E0B]";
|
| 121 |
const heroPillText = endpointOk ? `LIVE Β· VLLM ON ROCM Β· MI300X Β· ${modelName}` : "OFFLINE Β· DEMO FALLBACK ACTIVE";
|
| 122 |
|
| 123 |
+
const verifierModel = results?.verifier_model ?? "";
|
| 124 |
+
const verifierRole = results?.verifier_model_role ?? "";
|
| 125 |
+
const qwenAudited =
|
| 126 |
+
verifierModel.toLowerCase().includes("qwen") ||
|
| 127 |
+
verifierRole.toLowerCase().includes("qwen");
|
| 128 |
+
|
| 129 |
// ββ Sub-Component: Artifact Grid (Canonical Naming) ββββββββββββββββββββββββ
|
| 130 |
const ArtifactGridComponent = ({ run }: { run: RunResult }) => {
|
| 131 |
// Generate canonical file prefix
|
|
|
|
| 455 |
{/* LATEST REPORT HERO CARD */}
|
| 456 |
<div className="bg-gradient-to-br from-aegis-panel-3 to-aegis-panel border border-aegis-border-purple rounded-xl p-6 mb-6 shadow-aegis-card relative overflow-hidden">
|
| 457 |
<div className="absolute top-0 right-0 p-4 opacity-10 font-mono font-bold text-8xl text-aegis-purple">RPT</div>
|
| 458 |
+
<div className="flex items-center gap-2 mb-4">
|
| 459 |
+
<div className="font-sans font-bold text-[10px] leading-none text-aegis-purple-soft tracking-[0.14em] uppercase">
|
| 460 |
+
βΈ Latest Generated Report
|
| 461 |
+
</div>
|
| 462 |
+
|
| 463 |
+
{qwenAudited && (
|
| 464 |
+
<div className="inline-flex items-center rounded border border-aegis-border-purple bg-aegis-tint-purple px-2 py-1 font-mono text-[9px] font-bold uppercase tracking-[0.08em] text-aegis-purple-soft">
|
| 465 |
+
Audited by {verifierModel}
|
| 466 |
+
</div>
|
| 467 |
+
)}
|
| 468 |
+
</div>
|
| 469 |
|
| 470 |
<div className="flex justify-between items-start">
|
| 471 |
<div>
|
|
|
|
| 689 |
{ id: "red", label: "Threat Agent", model: "Llama 3.3 70B", top: "border-t-aegis-purple", text: "text-aegis-purple-soft", route: "primary" },
|
| 690 |
{ id: "blue", label: "Detection Agent", model: "Llama 3.3 70B", top: "border-t-aegis-blue", text: "text-aegis-blue-soft", route: "primary" },
|
| 691 |
{ id: "response", label: "Response Agent", model: "Llama 3.3 70B", top: "border-t-aegis-amber", text: "text-aegis-amber-soft", route: "primary" },
|
| 692 |
+
{ id: "verifier", label: "Validation Agent", model: qwenAudited ? verifierModel : "Qwen-ready Validator", top: "border-t-aegis-purple", text: "text-aegis-purple-soft", route: qwenAudited ? "qwen_validator" : "validator_ready" },
|
| 693 |
].map((agent, i) => {
|
| 694 |
const isRunning = activeAgent === agent.id;
|
| 695 |
const elapsed = agentTimes[agent.id];
|
frontend/src/types/aegis.ts
CHANGED
|
@@ -10,6 +10,8 @@ export interface Scores {
|
|
| 10 |
|
| 11 |
export interface RunResult {
|
| 12 |
technique_id: string; mode: string;
|
|
|
|
|
|
|
| 13 |
scores: Scores;
|
| 14 |
outputs: { red: string; blue: string; response: string; verifier: string };
|
| 15 |
artifacts: { sigma: string; splunk: string; raw_red: string; raw_blue: string };
|
|
|
|
| 10 |
|
| 11 |
export interface RunResult {
|
| 12 |
technique_id: string; mode: string;
|
| 13 |
+
verifier_model?: string;
|
| 14 |
+
verifier_model_role?: string;
|
| 15 |
scores: Scores;
|
| 16 |
outputs: { red: string; blue: string; response: string; verifier: string };
|
| 17 |
artifacts: { sigma: string; splunk: string; raw_red: string; raw_blue: string };
|
server.py
CHANGED
|
@@ -104,6 +104,8 @@ def _build_response(result: dict, tid: str) -> dict:
|
|
| 104 |
return {
|
| 105 |
"status": "success",
|
| 106 |
"technique_id": tid,
|
|
|
|
|
|
|
| 107 |
"outputs": {
|
| 108 |
"red": red,
|
| 109 |
"blue": blue,
|
|
@@ -163,16 +165,31 @@ async def _stream_demo(technique_id: str) -> AsyncIterator[str]:
|
|
| 163 |
("response", "response_output", "Response Agent", 2.4),
|
| 164 |
("verifier", "verifier_output", "Validation Agent", 1.9),
|
| 165 |
]
|
| 166 |
-
yield _sse("start", {
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
for key, field, label, delay in stages:
|
| 169 |
yield _sse("agent_start", {"agent": key, "label": label})
|
| 170 |
await asyncio.sleep(delay)
|
| 171 |
-
yield _sse("agent_done", {
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
full = _build_response(result, technique_id)
|
| 174 |
-
yield _sse("done", {
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
def _run_node(node_name: str, state: dict) -> dict:
|
| 178 |
from agents.red_agent import run_red_agent
|
|
@@ -226,8 +243,14 @@ async def _stream_live(technique_id: str, mode: str) -> AsyncIterator[str]:
|
|
| 226 |
all_results.append(state)
|
| 227 |
# Yield a sub-completion for multi-technique chains
|
| 228 |
full_sub = _build_response(state, tid)
|
| 229 |
-
yield _sse("done", {
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
# ββ Endpoints βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 233 |
@api.post("/run")
|
|
|
|
| 104 |
return {
|
| 105 |
"status": "success",
|
| 106 |
"technique_id": tid,
|
| 107 |
+
"verifier_model": result.get("verifier_model", "Unknown verifier model"),
|
| 108 |
+
"verifier_model_role": result.get("verifier_model_role", "unknown"),
|
| 109 |
"outputs": {
|
| 110 |
"red": red,
|
| 111 |
"blue": blue,
|
|
|
|
| 165 |
("response", "response_output", "Response Agent", 2.4),
|
| 166 |
("verifier", "verifier_output", "Validation Agent", 1.9),
|
| 167 |
]
|
| 168 |
+
yield _sse("start", {
|
| 169 |
+
"demo": True,
|
| 170 |
+
"technique_id": technique_id,
|
| 171 |
+
"pipeline_version": "aegisops-production-hybrid-v1",
|
| 172 |
+
})
|
| 173 |
+
|
| 174 |
for key, field, label, delay in stages:
|
| 175 |
yield _sse("agent_start", {"agent": key, "label": label})
|
| 176 |
await asyncio.sleep(delay)
|
| 177 |
+
yield _sse("agent_done", {
|
| 178 |
+
"agent": key,
|
| 179 |
+
"label": label,
|
| 180 |
+
"output": result.get(field, ""),
|
| 181 |
+
})
|
| 182 |
+
|
| 183 |
full = _build_response(result, technique_id)
|
| 184 |
+
yield _sse("done", {
|
| 185 |
+
"demo": True,
|
| 186 |
+
"metrics": full["metrics"],
|
| 187 |
+
"artifacts": full["artifacts"],
|
| 188 |
+
"scores": full["scores"],
|
| 189 |
+
"verifier_model": full.get("verifier_model"),
|
| 190 |
+
"verifier_model_role": full.get("verifier_model_role"),
|
| 191 |
+
})
|
| 192 |
+
|
| 193 |
|
| 194 |
def _run_node(node_name: str, state: dict) -> dict:
|
| 195 |
from agents.red_agent import run_red_agent
|
|
|
|
| 243 |
all_results.append(state)
|
| 244 |
# Yield a sub-completion for multi-technique chains
|
| 245 |
full_sub = _build_response(state, tid)
|
| 246 |
+
yield _sse("done", {
|
| 247 |
+
"demo": False,
|
| 248 |
+
"metrics": full_sub["metrics"],
|
| 249 |
+
"artifacts": full_sub["artifacts"],
|
| 250 |
+
"scores": full_sub["scores"],
|
| 251 |
+
"verifier_model": full_sub.get("verifier_model"),
|
| 252 |
+
"verifier_model_role": full_sub.get("verifier_model_role"),
|
| 253 |
+
})
|
| 254 |
|
| 255 |
# ββ Endpoints βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 256 |
@api.post("/run")
|