Spaces:

lablab-ai-amd-developer-hackathon
/

Threat_Hunter

Running

App Files Files Community

Threat_Hunter / agents /debate_engine.py

EricChen2005

Deploy ThreatHunter - AMD MI300X + Qwen2.5-32B

c8d30bc 1 day ago

raw

history blame contribute delete

22.6 kB

	# agents/debate_engine.py
	# 功能：3 輪辯論引擎 + Judge Sub-Agent
	# 架構依據：Du et al. (2023) "Improving Factuality and Reasoning in LLMs through Multiagent Debate"
	# ICML 2023, arXiv:2305.14325
	#
	# 設計原則（來自論文）：
	# - 多個獨立 LLM 實例（Analyst / Critic 各自獨立）
	# - 最多 3 輪迭代（報酬遞減，3 輪後效益趨緩）
	# - 無共識 → 第三方 Judge sub-agent 仲裁
	# - 安全性領域：偏向保守（高估風險比低估安全）
	#
	# 共識定義：
	# Analyst 與 Critic 的整體風險等級相差 ≤ 1 級
	# (例如 HIGH vs CRITICAL 不共識；HIGH vs HIGH 共識)

	from __future__ import annotations

	import json
	import logging
	import time
	from typing import Any

	from crewai import Agent, Task, Crew, Process

	from config import SYSTEM_CONSTITUTION, get_llm, degradation_status

	logger = logging.getLogger("ThreatHunter.debate_engine")

	# ══════════════════════════════════════════════════════════════════
	# 風險等級映射（用於共識判定）
	# ══════════════════════════════════════════════════════════════════
	RISK_LEVELS: dict[str, int] = {
	"LOW": 1,
	"MEDIUM": 2,
	"HIGH": 3,
	"CRITICAL": 4,
	}

	MAX_DEBATE_ROUNDS = 3


	# ══════════════════════════════════════════════════════════════════
	# Judge Sub-Agent 構建
	# ══════════════════════════════════════════════════════════════════

	def _build_judge_agent() -> Agent:
	"""
	Judge sub-agent：獨立第三方裁決者。

	設計原則（Du et al. 2023）：
	- 不是 Analyst 也不是 Critic 的延伸
	- 收到完整辯論紀錄後，選擇最有邏輯支持的立場
	- 安全性領域偏向保守（寧可高估風險）
	"""
	return Agent(
	role="Security Arbitration Judge",
	goal=(
	"Review the complete debate history between Analyst and Critic. "
	"Select the most logically supported risk assessment. "
	"In case of equal evidence, err on the side of caution (higher risk). "
	"Output a final JSON verdict."
	),
	backstory=(
	f"{SYSTEM_CONSTITUTION}\n\n"
	"You are an impartial security arbitration judge. You were not involved in the debate. "
	"Your task is to read both sides' arguments and render a final, binding verdict. "
	"You must cite which round's argument was most persuasive and why. "
	"In security contexts, when evidence is ambiguous, choose the MORE SEVERE rating."
	),
	llm=get_llm(),
	verbose=True,
	max_iter=3,
	)


	# ══════════════════════════════════════════════════════════════════
	# 辯論引擎
	# ══════════════════════════════════════════════════════════════════

	class DebateEngine:
	"""
	實作 Du et al. (2023) Multiagent Debate 機制。

	流程：
	Round 1: Analyst 提出初始立場 → Critic 質疑
	Round 2: Analyst 更新立場（含 Critic 反饋）→ Critic 再評
	Round 3: Analyst 最終立場 → Critic 最終評判
	Final: 若 3 輪後仍無共識 → Judge sub-agent 仲裁
	"""

	def __init__(self, max_rounds: int = MAX_DEBATE_ROUNDS):
	self.max_rounds = max_rounds
	self.min_rounds_with_findings = min(2, max_rounds)

	def run_debate(
	self,
	analyst_output: dict[str, Any],
	input_type: str = "pkg",
	on_progress: Any = None,
	) -> dict[str, Any]:
	"""
	執行完整辯論流程。

	Args:
	analyst_output: Analyst 的初始分析結果（来自 run_analyst_pipeline）
	input_type: 輸入類型（pkg/code/config 等，影響 Critic skill 選擇）
	on_progress: SSE 進度回調

	Returns:
	最終裁決結果（格式同 run_critic_pipeline 輸出）
	"""
	from agents.critic import run_critic_pipeline

	t0 = time.time()
	history: list[dict[str, Any]] = []
	current_analyst_output = analyst_output
	consensus = False
	consensus_round = 0
	early_stop_reason = ""
	final_critic_result: dict[str, Any] = {}
	has_findings = self._has_findings(analyst_output)

	logger.info("[DEBATE] Starting %d-round debate (Du et al. 2023)", self.max_rounds)

	if on_progress:
	try:
	on_progress("debate", "RUNNING", {"step": "starting", "max_rounds": self.max_rounds})
	except Exception:
	pass

	for round_num in range(1, self.max_rounds + 1):
	logger.info("[DEBATE] Round %d/%d starting", round_num, self.max_rounds)

	if on_progress:
	try:
	on_progress("debate", "RUNNING", {
	"step": f"round_{round_num}",
	"round": round_num,
	"max_rounds": self.max_rounds,
	})
	except Exception:
	pass

	# ── Critic 評審 Analyst 的當輪立場 ──────────────
	# 注入當輪上下文，讓 Critic 知道這是第幾輪
	round_context = dict(current_analyst_output)
	round_context["_debate_round"] = round_num
	round_context["_debate_max_rounds"] = self.max_rounds
	if history:
	round_context["_prev_critic_challenges"] = history[-1].get("critic", {}).get("challenges", [])

	try:
	critic_result = run_critic_pipeline(round_context, input_type=input_type)
	except Exception as e:
	logger.warning("[DEBATE] Round %d Critic failed: %s — using maintain verdict", round_num, e)
	critic_result = {
	"verdict": "MAINTAIN",
	"weighted_score": 70.0,
	"challenges": [],
	"reasoning": f"Critic failed in round {round_num}: {e}",
	"_degraded": True,
	}

	history.append({
	"round": round_num,
	"analyst": current_analyst_output,
	"critic": critic_result,
	})

	final_critic_result = critic_result

	# ── 共識判定 ────────────────────────────────────
	if self._check_consensus(current_analyst_output, critic_result):
	if has_findings and round_num < self.min_rounds_with_findings and not critic_result.get("no_challenge"):
	logger.info(
	"[DEBATE] Consensus detected at round %d, continuing to minimum %d rounds because findings exist",
	round_num,
	self.min_rounds_with_findings,
	)
	if round_num < self.max_rounds:
	current_analyst_output = self._analyst_rebuttal(
	current_analyst_output, critic_result, round_num
	)
	continue
	consensus = True
	consensus_round = round_num
	early_stop_reason = "consensus_after_min_rounds" if has_findings else "no_findings"
	logger.info(
	"[DEBATE] Consensus reached at round %d \| "
	"analyst_risk=%s \| critic_verdict=%s",
	round_num,
	self._get_analyst_risk(current_analyst_output),
	critic_result.get("verdict", "UNKNOWN"),
	)
	break

	# ── Analyst 更新立場（含 Critic 反饋）───────────
	if round_num < self.max_rounds:
	current_analyst_output = self._analyst_rebuttal(
	current_analyst_output, critic_result, round_num
	)

	# ── 最終裁決 ────────────────────────────────────────
	elapsed_ms = int((time.time() - t0) * 1000)

	if consensus:
	logger.info(
	"[DEBATE] ✅ Consensus after %d/%d rounds \| elapsed=%dms",
	consensus_round, self.max_rounds, elapsed_ms,
	)
	result = dict(final_critic_result)
	result["_debate_meta"] = {
	"consensus": True,
	"consensus_round": consensus_round,
	"total_rounds": round_num,
	"elapsed_ms": elapsed_ms,
	"method": "multiagent_debate_Du2023",
	"early_stop_reason": early_stop_reason,
	"rounds": self._summarize_rounds(history),
	}
	return result
	else:
	logger.warning(
	"[DEBATE] ⚠️ No consensus after %d rounds — invoking Judge sub-agent",
	self.max_rounds,
	)
	return self._judge_verdict(history, elapsed_ms)

	# ── 私有輔助方法 ────────────────────────────────────────────

	def _check_consensus(self, analyst_output: dict, critic_result: dict) -> bool:
	"""
	共識判定：風險等級相差 ≤ 1 級視為共識。

	CRITICAL vs HIGH → 不共識（差 1 級，視為邊界，保留辯論）
	CRITICAL vs CRITICAL → 共識
	HIGH vs MEDIUM → 不共識
	HIGH vs HIGH → 共識
	Critic verdict=MAINTAIN → 直接共識（Critic 同意 Analyst）
	"""
	# Critic 直接同意 → 共識
	verdict = critic_result.get("verdict", "MAINTAIN")
	if verdict == "MAINTAIN":
	return True

	# Critic 明確要求降級 → 需要繼續辯論
	if verdict == "DOWNGRADE":
	analyst_risk = self._get_analyst_risk(analyst_output)
	a_level = RISK_LEVELS.get(analyst_risk, 2)
	# Downgrade 代表 Critic 認為應降一級，如果分數已夠低則接受
	score = critic_result.get("weighted_score", 70)
	if score >= 80: # Critic 評分高，說明 Analyst 立場強
	return True
	return False

	# 其他情況（ESCALATE 等）—— 看分數
	score = critic_result.get("weighted_score", 70)
	return score >= 75

	def _has_findings(self, analyst_output: dict[str, Any]) -> bool:
	"""判斷 Analyst 是否真的有發現項；有發現時至少需要第二輪交叉質疑。"""
	for key in ("analysis", "vulnerabilities", "code_patterns", "code_patterns_summary"):
	value = analyst_output.get(key)
	if isinstance(value, list) and len(value) > 0:
	return True
	return False

	def _summarize_rounds(self, history: list[dict[str, Any]]) -> list[dict[str, Any]]:
	"""產生輕量回合摘要，供 UI/測試確認辯論沒有被第一輪吞掉。"""
	rounds = []
	for entry in history:
	critic = entry.get("critic", {})
	analyst = entry.get("analyst", {})
	rounds.append({
	"round": entry.get("round"),
	"analyst_risk": self._get_analyst_risk(analyst),
	"critic_verdict": critic.get("verdict"),
	"critic_score": critic.get("weighted_score"),
	"challenge_count": len(critic.get("challenges", [])),
	})
	return rounds

	def _get_analyst_risk(self, analyst_output: dict) -> str:
	"""從 Analyst 輸出提取整體風險等級"""
	analysis = analyst_output.get("analysis", [])
	if not analysis:
	return "UNKNOWN"
	# 取最高嚴重性
	max_level = 0
	max_name = "UNKNOWN"
	for entry in analysis:
	sev = entry.get("severity") or entry.get("adjusted_risk", "MEDIUM")
	level = RISK_LEVELS.get(sev, 0)
	if level > max_level:
	max_level = level
	max_name = sev
	return max_name

	@staticmethod
	def _normalize_challenge(challenge: Any) -> dict[str, str]:
	"""Support both string[] contracts and object-style challenge records."""
	if isinstance(challenge, dict):
	return {
	"type": str(challenge.get("type") or "challenge"),
	"description": str(challenge.get("description") or challenge.get("text") or challenge),
	}
	return {"type": "challenge", "description": str(challenge)}

	def _analyst_rebuttal(
	self,
	analyst_output: dict,
	critic_result: dict,
	round_num: int,
	) -> dict:
	"""
	Analyst 更新立場（簡化版：注入 Critic 挑戰並修改 reasoning）

	在完整實作中，這裡應該重新呼叫 Analyst LLM。
	目前為輕量實作：直接在輸出中注入 Critic 反饋，
	讓下一輪的 Critic 看到更新後的上下文。
	"""
	updated = dict(analyst_output)
	challenges = [
	self._normalize_challenge(challenge)
	for challenge in critic_result.get("challenges", [])
	]
	updated["_critic_challenges_r"] = {
	"round": round_num,
	"challenges": challenges,
	"critic_verdict": critic_result.get("verdict"),
	"critic_score": critic_result.get("weighted_score", 70),
	}
	# 更新 reasoning，標注 Analyst 已看到 Critic 反饋
	if updated.get("analysis"):
	for entry in updated["analysis"]:
	if "reasoning" in entry:
	entry["reasoning"] = (
	f"[Round {round_num} rebuttal after Critic verdict={critic_result.get('verdict')}: "
	f"challenges={[c.get('type') for c in challenges[:3]]}] "
	+ entry["reasoning"]
	)
	logger.info(
	"[DEBATE] Round %d analyst rebuttal: addressed %d challenges",
	round_num, len(challenges),
	)
	return updated

	def _judge_verdict(
	self,
	debate_history: list[dict],
	elapsed_ms: int,
	) -> dict[str, Any]:
	"""
	Judge sub-agent 仲裁：無共識時由第三方裁決。

	原則（Du et al. 2023 + 安全性保守原則）：
	- 閱讀完整辯論紀錄
	- 選擇最有邏輯支持的立場
	- 若證據相當，選 MORE SEVERE（安全性偏保守）
	"""
	t_judge = time.time()
	logger.info("[DEBATE] Invoking Judge sub-agent...")

	# 準備辯論摘要文字
	debate_summary = self._format_debate_history(debate_history)

	try:
	judge_agent = _build_judge_agent()

	task_desc = (
	f"DEBATE HISTORY ({len(debate_history)} rounds):\n\n"
	f"{debate_summary}\n\n"
	f"No consensus was reached after {len(debate_history)} rounds.\n\n"
	f"Your task:\n"
	f"1. Review all rounds of argument and counter-argument\n"
	f"2. Identify which side presented stronger evidence\n"
	f"3. If evidence is equal, choose the HIGHER risk level (security-conservative principle)\n"
	f"4. Output a JSON verdict with:\n"
	f" {{\"verdict\": \"MAINTAIN\|DOWNGRADE\|ESCALATE\", "
	f"\"weighted_score\": 0-100, "
	f"\"reasoning\": \"...\", "
	f"\"winning_round\": 1-3, "
	f"\"judge_note\": \"...\"}}\n\n"
	f"Output ONLY the JSON, no other text."
	)

	task = Task(
	description=task_desc,
	expected_output="Pure JSON judge verdict",
	agent=judge_agent,
	)

	crew = Crew(
	agents=[judge_agent],
	tasks=[task],
	process=Process.sequential,
	verbose=False,
	)

	result_str = str(crew.kickoff()).strip()

	# 解析 JSON
	import re
	if "```json" in result_str:
	result_str = result_str.split("```json")[1].split("```")[0].strip()
	elif "```" in result_str:
	parts = result_str.split("```")
	if len(parts) >= 3:
	result_str = parts[1].strip()

	judge_verdict = json.loads(result_str)
	judge_elapsed = int((time.time() - t_judge) * 1000)

	logger.info(
	"[DEBATE] Judge verdict: %s (score=%s) in %dms",
	judge_verdict.get("verdict"),
	judge_verdict.get("weighted_score"),
	judge_elapsed,
	)

	judge_verdict["_debate_meta"] = {
	"consensus": False,
	"total_rounds": len(debate_history),
	"judge_invoked": True,
	"judge_elapsed_ms": judge_elapsed,
	"total_elapsed_ms": elapsed_ms + judge_elapsed,
	"method": "multiagent_debate_Du2023_with_judge",
	"rounds": self._summarize_rounds(debate_history),
	}
	return judge_verdict

	except Exception as e:
	logger.error("[DEBATE] Judge sub-agent failed: %s — falling back to last Critic result", e)
	degradation_status.degrade("DebateJudge", str(e))

	# Fallback：取最後一輪的 Critic 結果，強制為 MAINTAIN（保守）
	last_critic = debate_history[-1].get("critic", {}) if debate_history else {}
	fallback = dict(last_critic)
	fallback["verdict"] = "MAINTAIN" # 保守：維持 Analyst 立場
	fallback["_debate_meta"] = {
	"consensus": False,
	"total_rounds": len(debate_history),
	"judge_invoked": True,
	"judge_failed": True,
	"judge_error": str(e),
	"total_elapsed_ms": elapsed_ms,
	"method": "multiagent_debate_Du2023_judge_fallback",
	"rounds": self._summarize_rounds(debate_history),
	}
	return fallback

	def _format_debate_history(self, history: list[dict]) -> str:
	"""將辯論紀錄格式化為可讀字串，供 Judge 閱讀"""
	lines = []
	for entry in history:
	r = entry.get("round", "?")
	analyst = entry.get("analyst", {})
	critic = entry.get("critic", {})

	# Analyst 立場摘要
	risk = self._get_analyst_risk(analyst)
	findings_count = len(analyst.get("analysis", []))
	lines.append(f"=== Round {r} ===")
	lines.append(f"ANALYST: overall_risk={risk}, findings={findings_count}")

	# 取前 2 個 finding 的 reasoning
	for f in analyst.get("analysis", [])[:2]:
	lines.append(f" - [{f.get('severity')}] {f.get('cwe_id', f.get('cve_id', 'N/A'))}: {f.get('reasoning', '')[:100]}")

	# Critic 立場摘要
	verdict = critic.get("verdict", "UNKNOWN")
	score = critic.get("weighted_score", "?")
	lines.append(f"CRITIC: verdict={verdict}, score={score}")
	for raw_ch in critic.get("challenges", [])[:2]:
	ch = self._normalize_challenge(raw_ch)
	lines.append(f" - Challenge [{ch.get('type')}]: {ch.get('description', '')[:100]}")

	lines.append("")

	return "\n".join(lines)


	# ══════════════════════════════════════════════════════════════════
	# 便利函式：整合進 main.py 的接口
	# ══════════════════════════════════════════════════════════════════

	def run_debate_pipeline(
	analyst_output: dict[str, Any],
	input_type: str = "pkg",
	on_progress: Any = None,
	) -> dict[str, Any]:
	"""
	執行完整辯論流程的頂層函式。
	直接替換 main.py 中的單輪 Critic 呼叫。

	Args:
	analyst_output: Analyst 的初始分析結果
	input_type: 輸入類型
	on_progress: SSE 進度回調

	Returns:
	辯論最終裁決（格式同 run_critic_pipeline 輸出，
	附加 _debate_meta 欄位紀錄辯論狀態）
	"""
	engine = DebateEngine(max_rounds=MAX_DEBATE_ROUNDS)

	try:
	return engine.run_debate(analyst_output, input_type=input_type, on_progress=on_progress)
	except Exception as e:
	logger.error("[DEBATE] DebateEngine failed: %s — falling back to single Critic", e)
	degradation_status.degrade("DebateEngine", str(e))

	# 完全降級：退回單輪 Critic（原本行為）
	try:
	from agents.critic import run_critic_pipeline
	result = run_critic_pipeline(analyst_output, input_type=input_type)
	result["_debate_meta"] = {
	"consensus": None,
	"engine_failed": True,
	"error": str(e),
	"fallback": "single_round_critic",
	}
	return result
	except Exception as e2:
	logger.error("[DEBATE] Critic fallback also failed: %s", e2)
	return {
	"verdict": "MAINTAIN",
	"weighted_score": 60.0,
	"challenges": [],
	"reasoning": f"Debate engine and Critic both failed: {e} / {e2}",
	"_degraded": True,
	"_debate_meta": {"engine_failed": True, "critic_failed": True},
	}