ztothez commited on
Commit
4ab37ac
Β·
1 Parent(s): ed2a586

feat: add Qwen validator sidecar metadata

Browse files
agents/verifier_agent.py CHANGED
@@ -18,7 +18,12 @@ Verify whether high-fidelity red-team artifacts are covered by detection and res
18
  """),
19
  ]
20
  content, metric = invoke_with_metrics(chat, messages, "verifier_agent")
 
 
 
21
  return {
22
  "verifier_output": content,
 
 
23
  "metrics": merge_metrics(state, metric),
24
  }
 
18
  """),
19
  ]
20
  content, metric = invoke_with_metrics(chat, messages, "verifier_agent")
21
+ verifier_model = metric.get("model") or "Unknown verifier model"
22
+ verifier_model_role = metric.get("model_role") or metric.get("requested_role") or "unknown"
23
+
24
  return {
25
  "verifier_output": content,
26
+ "verifier_model": verifier_model,
27
+ "verifier_model_role": verifier_model_role,
28
  "metrics": merge_metrics(state, metric),
29
  }
demo_output.py CHANGED
@@ -688,5 +688,7 @@ DEMO_INVOKE_RESULT = {
688
  "blue_output": DEMO_BLUE_OUTPUT,
689
  "response_output": DEMO_RESPONSE_OUTPUT,
690
  "verifier_output": DEMO_VERIFIER_OUTPUT,
 
 
691
  "metrics": DEMO_METRICS,
692
  }
 
688
  "blue_output": DEMO_BLUE_OUTPUT,
689
  "response_output": DEMO_RESPONSE_OUTPUT,
690
  "verifier_output": DEMO_VERIFIER_OUTPUT,
691
+ "verifier_model": "Qwen Validator Demo",
692
+ "verifier_model_role": "qwen",
693
  "metrics": DEMO_METRICS,
694
  }
frontend/src/app/page.tsx CHANGED
@@ -120,6 +120,12 @@ export default function SOCCommandCenter() {
120
  const heroPillDot = endpointOk ? "bg-aegis-green shadow-[0_0_6px_#22C55E]" : "bg-aegis-amber shadow-[0_0_6px_#F59E0B]";
121
  const heroPillText = endpointOk ? `LIVE Β· VLLM ON ROCM Β· MI300X Β· ${modelName}` : "OFFLINE Β· DEMO FALLBACK ACTIVE";
122
 
 
 
 
 
 
 
123
  // ── Sub-Component: Artifact Grid (Canonical Naming) ────────────────────────
124
  const ArtifactGridComponent = ({ run }: { run: RunResult }) => {
125
  // Generate canonical file prefix
@@ -449,7 +455,17 @@ export default function SOCCommandCenter() {
449
  {/* LATEST REPORT HERO CARD */}
450
  <div className="bg-gradient-to-br from-aegis-panel-3 to-aegis-panel border border-aegis-border-purple rounded-xl p-6 mb-6 shadow-aegis-card relative overflow-hidden">
451
  <div className="absolute top-0 right-0 p-4 opacity-10 font-mono font-bold text-8xl text-aegis-purple">RPT</div>
452
- <div className="font-sans font-bold text-[10px] leading-none text-aegis-purple-soft tracking-[0.14em] uppercase mb-4">β–Έ Latest Generated Report</div>
 
 
 
 
 
 
 
 
 
 
453
 
454
  <div className="flex justify-between items-start">
455
  <div>
@@ -673,7 +689,7 @@ export default function SOCCommandCenter() {
673
  { id: "red", label: "Threat Agent", model: "Llama 3.3 70B", top: "border-t-aegis-purple", text: "text-aegis-purple-soft", route: "primary" },
674
  { id: "blue", label: "Detection Agent", model: "Llama 3.3 70B", top: "border-t-aegis-blue", text: "text-aegis-blue-soft", route: "primary" },
675
  { id: "response", label: "Response Agent", model: "Llama 3.3 70B", top: "border-t-aegis-amber", text: "text-aegis-amber-soft", route: "primary" },
676
- { id: "verifier", label: "Validation Agent", model: "Qwen / QwQ", top: "border-t-aegis-purple", text: "text-aegis-purple-soft", route: "qwen_validator" },
677
  ].map((agent, i) => {
678
  const isRunning = activeAgent === agent.id;
679
  const elapsed = agentTimes[agent.id];
 
120
  const heroPillDot = endpointOk ? "bg-aegis-green shadow-[0_0_6px_#22C55E]" : "bg-aegis-amber shadow-[0_0_6px_#F59E0B]";
121
  const heroPillText = endpointOk ? `LIVE Β· VLLM ON ROCM Β· MI300X Β· ${modelName}` : "OFFLINE Β· DEMO FALLBACK ACTIVE";
122
 
123
+ const verifierModel = results?.verifier_model ?? "";
124
+ const verifierRole = results?.verifier_model_role ?? "";
125
+ const qwenAudited =
126
+ verifierModel.toLowerCase().includes("qwen") ||
127
+ verifierRole.toLowerCase().includes("qwen");
128
+
129
  // ── Sub-Component: Artifact Grid (Canonical Naming) ────────────────────────
130
  const ArtifactGridComponent = ({ run }: { run: RunResult }) => {
131
  // Generate canonical file prefix
 
455
  {/* LATEST REPORT HERO CARD */}
456
  <div className="bg-gradient-to-br from-aegis-panel-3 to-aegis-panel border border-aegis-border-purple rounded-xl p-6 mb-6 shadow-aegis-card relative overflow-hidden">
457
  <div className="absolute top-0 right-0 p-4 opacity-10 font-mono font-bold text-8xl text-aegis-purple">RPT</div>
458
+ <div className="flex items-center gap-2 mb-4">
459
+ <div className="font-sans font-bold text-[10px] leading-none text-aegis-purple-soft tracking-[0.14em] uppercase">
460
+ β–Έ Latest Generated Report
461
+ </div>
462
+
463
+ {qwenAudited && (
464
+ <div className="inline-flex items-center rounded border border-aegis-border-purple bg-aegis-tint-purple px-2 py-1 font-mono text-[9px] font-bold uppercase tracking-[0.08em] text-aegis-purple-soft">
465
+ Audited by {verifierModel}
466
+ </div>
467
+ )}
468
+ </div>
469
 
470
  <div className="flex justify-between items-start">
471
  <div>
 
689
  { id: "red", label: "Threat Agent", model: "Llama 3.3 70B", top: "border-t-aegis-purple", text: "text-aegis-purple-soft", route: "primary" },
690
  { id: "blue", label: "Detection Agent", model: "Llama 3.3 70B", top: "border-t-aegis-blue", text: "text-aegis-blue-soft", route: "primary" },
691
  { id: "response", label: "Response Agent", model: "Llama 3.3 70B", top: "border-t-aegis-amber", text: "text-aegis-amber-soft", route: "primary" },
692
+ { id: "verifier", label: "Validation Agent", model: qwenAudited ? verifierModel : "Qwen-ready Validator", top: "border-t-aegis-purple", text: "text-aegis-purple-soft", route: qwenAudited ? "qwen_validator" : "validator_ready" },
693
  ].map((agent, i) => {
694
  const isRunning = activeAgent === agent.id;
695
  const elapsed = agentTimes[agent.id];
frontend/src/types/aegis.ts CHANGED
@@ -10,6 +10,8 @@ export interface Scores {
10
 
11
  export interface RunResult {
12
  technique_id: string; mode: string;
 
 
13
  scores: Scores;
14
  outputs: { red: string; blue: string; response: string; verifier: string };
15
  artifacts: { sigma: string; splunk: string; raw_red: string; raw_blue: string };
 
10
 
11
  export interface RunResult {
12
  technique_id: string; mode: string;
13
+ verifier_model?: string;
14
+ verifier_model_role?: string;
15
  scores: Scores;
16
  outputs: { red: string; blue: string; response: string; verifier: string };
17
  artifacts: { sigma: string; splunk: string; raw_red: string; raw_blue: string };
server.py CHANGED
@@ -104,6 +104,8 @@ def _build_response(result: dict, tid: str) -> dict:
104
  return {
105
  "status": "success",
106
  "technique_id": tid,
 
 
107
  "outputs": {
108
  "red": red,
109
  "blue": blue,
@@ -163,16 +165,31 @@ async def _stream_demo(technique_id: str) -> AsyncIterator[str]:
163
  ("response", "response_output", "Response Agent", 2.4),
164
  ("verifier", "verifier_output", "Validation Agent", 1.9),
165
  ]
166
- yield _sse("start", {"demo": True, "technique_id": technique_id,
167
- "pipeline_version": "aegisops-production-hybrid-v1"})
 
 
 
 
168
  for key, field, label, delay in stages:
169
  yield _sse("agent_start", {"agent": key, "label": label})
170
  await asyncio.sleep(delay)
171
- yield _sse("agent_done", {"agent": key, "label": label, "output": result.get(field, "")})
172
-
 
 
 
 
173
  full = _build_response(result, technique_id)
174
- yield _sse("done", {"demo": True, "metrics": full["metrics"],
175
- "artifacts": full["artifacts"], "scores": full["scores"]})
 
 
 
 
 
 
 
176
 
177
  def _run_node(node_name: str, state: dict) -> dict:
178
  from agents.red_agent import run_red_agent
@@ -226,8 +243,14 @@ async def _stream_live(technique_id: str, mode: str) -> AsyncIterator[str]:
226
  all_results.append(state)
227
  # Yield a sub-completion for multi-technique chains
228
  full_sub = _build_response(state, tid)
229
- yield _sse("done", {"demo": False, "metrics": full_sub["metrics"],
230
- "artifacts": full_sub["artifacts"], "scores": full_sub["scores"]})
 
 
 
 
 
 
231
 
232
  # ── Endpoints ─────────────────────────────────────────────────────────────────
233
  @api.post("/run")
 
104
  return {
105
  "status": "success",
106
  "technique_id": tid,
107
+ "verifier_model": result.get("verifier_model", "Unknown verifier model"),
108
+ "verifier_model_role": result.get("verifier_model_role", "unknown"),
109
  "outputs": {
110
  "red": red,
111
  "blue": blue,
 
165
  ("response", "response_output", "Response Agent", 2.4),
166
  ("verifier", "verifier_output", "Validation Agent", 1.9),
167
  ]
168
+ yield _sse("start", {
169
+ "demo": True,
170
+ "technique_id": technique_id,
171
+ "pipeline_version": "aegisops-production-hybrid-v1",
172
+ })
173
+
174
  for key, field, label, delay in stages:
175
  yield _sse("agent_start", {"agent": key, "label": label})
176
  await asyncio.sleep(delay)
177
+ yield _sse("agent_done", {
178
+ "agent": key,
179
+ "label": label,
180
+ "output": result.get(field, ""),
181
+ })
182
+
183
  full = _build_response(result, technique_id)
184
+ yield _sse("done", {
185
+ "demo": True,
186
+ "metrics": full["metrics"],
187
+ "artifacts": full["artifacts"],
188
+ "scores": full["scores"],
189
+ "verifier_model": full.get("verifier_model"),
190
+ "verifier_model_role": full.get("verifier_model_role"),
191
+ })
192
+
193
 
194
  def _run_node(node_name: str, state: dict) -> dict:
195
  from agents.red_agent import run_red_agent
 
243
  all_results.append(state)
244
  # Yield a sub-completion for multi-technique chains
245
  full_sub = _build_response(state, tid)
246
+ yield _sse("done", {
247
+ "demo": False,
248
+ "metrics": full_sub["metrics"],
249
+ "artifacts": full_sub["artifacts"],
250
+ "scores": full_sub["scores"],
251
+ "verifier_model": full_sub.get("verifier_model"),
252
+ "verifier_model_role": full_sub.get("verifier_model_role"),
253
+ })
254
 
255
  # ── Endpoints ─────────────────────────────────────────────────────────────────
256
  @api.post("/run")