Spaces:

lablab-ai-amd-developer-hackathon
/

Threat_Hunter

Running

App Files Files Community

Threat_Hunter / tools /attck_tool.py

EricChen2005

Deploy ThreatHunter - AMD MI300X + Qwen2.5-32B

c8d30bc 1 day ago

raw

history blame contribute delete

15.3 kB

	# tools/attck_tool.py
	# 功能：CWE → CAPEC → MITRE ATT&CK Technique 映射
	# 架構定位：補全 Intel Fusion 六維分析的 ATT&CK 維度（權重 10%）
	#
	# 為什麼用靜態映射（不用 API）：
	# - MITRE ATT&CK 官方 TAXII 伺服器沒有 CVE→Technique 直接查詢端點
	# - 研究：CAPEC 是 CWE 和 ATT&CK 之間最結構化的橋接
	# - 路徑：CVE description → CWE → CAPEC → ATT&CK Technique
	# - 來源：NIST NVD 提供 CWE，MITRE 官方提供 CAPEC→ATT&CK 對應
	#
	# 佐證：
	# - Center for Threat-Informed Defense (CTID) Mappings Explorer
	# - https://mappings-explorer.mitre.org/
	# - NopSec (2024): "CWE→CAPEC→ATT&CK is the most structured mapping path"
	# - GitHub: threatsurfer/cve-attack-mapper（參考實作）
	#
	# 使用方式：
	# from tools.attck_tool import lookup_attck_technique

	import json
	import logging
	import re

	logger = logging.getLogger("ThreatHunter.attck_tool")

	# ══════════════════════════════════════════════════════════════
	# CWE → ATT&CK Technique 和 CAPEC 映射表
	# 來源：MITRE ATT&CK v14 + CAPEC 3.9
	# 涵蓋最常見的 Web/系統漏洞 CWE
	# ══════════════════════════════════════════════════════════════

	CWE_TO_ATTCK: dict[str, dict] = {
	# ── 注入類 ──────────────────────────────────────────────
	"CWE-89": {
	"technique_id": "T1190",
	"technique_name": "Exploit Public-Facing Application",
	"tactic": "Initial Access",
	"capec": "CAPEC-66",
	"capec_name": "SQL Injection",
	"description": "SQL injection allows attackers to manipulate DB queries",
	},
	"CWE-78": {
	"technique_id": "T1059",
	"technique_name": "Command and Scripting Interpreter",
	"tactic": "Execution",
	"capec": "CAPEC-88",
	"capec_name": "OS Command Injection",
	"description": "OS command injection via improper input neutralization",
	},
	"CWE-77": {
	"technique_id": "T1059",
	"technique_name": "Command and Scripting Interpreter",
	"tactic": "Execution",
	"capec": "CAPEC-88",
	"capec_name": "Command Injection",
	"description": "Command injection in shell-invoked functions",
	},
	# ── XSS / 客戶端注入 ─────────────────────────────────────
	"CWE-79": {
	"technique_id": "T1059.007",
	"technique_name": "JavaScript",
	"tactic": "Execution",
	"capec": "CAPEC-86",
	"capec_name": "XSS via HTTP Query Strings",
	"description": "Cross-site scripting enables malicious script injection",
	},
	"CWE-80": {
	"technique_id": "T1059.007",
	"technique_name": "JavaScript",
	"tactic": "Execution",
	"capec": "CAPEC-198",
	"capec_name": "XSS via HTTP Headers",
	"description": "Basic XSS through unescaped HTML",
	},
	# ── 路徑遍歷 / 檔案操作 ──────────────────────────────────
	"CWE-22": {
	"technique_id": "T1083",
	"technique_name": "File and Directory Discovery",
	"tactic": "Discovery",
	"capec": "CAPEC-126",
	"capec_name": "Path Traversal",
	"description": "Path traversal grants unauthorized file access",
	},
	"CWE-73": {
	"technique_id": "T1083",
	"technique_name": "File and Directory Discovery",
	"tactic": "Discovery",
	"capec": "CAPEC-126",
	"capec_name": "External Control of File Name",
	"description": "Externally controlled file name leads to traversal",
	},
	# ── 認證 / 授權 ──────────────────────────────────────────
	"CWE-287": {
	"technique_id": "T1078",
	"technique_name": "Valid Accounts",
	"tactic": "Initial Access",
	"capec": "CAPEC-115",
	"capec_name": "Authentication Bypass",
	"description": "Authentication bypass allows unauthorized access",
	},
	"CWE-306": {
	"technique_id": "T1078",
	"technique_name": "Valid Accounts",
	"tactic": "Initial Access",
	"capec": "CAPEC-115",
	"capec_name": "Missing Authentication",
	"description": "Missing authentication for critical function",
	},
	"CWE-798": {
	"technique_id": "T1552.001",
	"technique_name": "Credentials In Files",
	"tactic": "Credential Access",
	"capec": "CAPEC-191",
	"capec_name": "Hardcoded Credentials",
	"description": "Hardcoded credentials expose authentication secrets",
	},
	# ── 暴露敏感資訊 ─────────────────────────────────────────
	"CWE-200": {
	"technique_id": "T1530",
	"technique_name": "Data from Cloud Storage",
	"tactic": "Collection",
	"capec": "CAPEC-118",
	"capec_name": "Collect and Analyze Information",
	"description": "Exposure of sensitive information to unauthorized actors",
	},
	"CWE-312": {
	"technique_id": "T1552.004",
	"technique_name": "Private Keys",
	"tactic": "Credential Access",
	"capec": "CAPEC-37",
	"capec_name": "Unencrypted Storage",
	"description": "Cleartext storage of sensitive information",
	},
	# ── 序列化 / 反序列化 ─────────────────────────────────────
	"CWE-502": {
	"technique_id": "T1059",
	"technique_name": "Command and Scripting Interpreter",
	"tactic": "Execution",
	"capec": "CAPEC-586",
	"capec_name": "Object Injection",
	"description": "Deserialization of untrusted data enables code execution",
	},
	# ── SSRF / 請求偽造 ──────────────────────────────────────
	"CWE-918": {
	"technique_id": "T1090",
	"technique_name": "Proxy",
	"tactic": "Command and Control",
	"capec": "CAPEC-664",
	"capec_name": "Server-Side Request Forgery",
	"description": "SSRF allows reaching internal services via the server",
	},
	# ── 快取 / 中間人 ─────────────────────────────────────────
	"CWE-295": {
	"technique_id": "T1557",
	"technique_name": "Adversary-in-the-Middle",
	"tactic": "Collection",
	"capec": "CAPEC-94",
	"capec_name": "Man-in-the-Browser",
	"description": "Improper certificate validation enables MITM",
	},
	# ── Buffer Overflow / 記憶體安全 ─────────────────────────
	"CWE-120": {
	"technique_id": "T1203",
	"technique_name": "Exploitation for Client Execution",
	"tactic": "Execution",
	"capec": "CAPEC-100",
	"capec_name": "Overflow Buffers",
	"description": "Buffer overflow can lead to arbitrary code execution",
	},
	"CWE-119": {
	"technique_id": "T1203",
	"technique_name": "Exploitation for Client Execution",
	"tactic": "Execution",
	"capec": "CAPEC-100",
	"capec_name": "Memory Buffer Overflow",
	"description": "Improper restriction of buffer operations",
	},
	# ── Use-After-Free / 記憶體管理 ───────────────────────────
	"CWE-416": {
	"technique_id": "T1203",
	"technique_name": "Exploitation for Client Execution",
	"tactic": "Execution",
	"capec": "CAPEC-46",
	"capec_name": "Overflow Variables and Tags",
	"description": "Use-after-free enables heap manipulation attacks",
	},
	# ── 供應鏈 ───────────────────────────────────────────────
	"CWE-494": {
	"technique_id": "T1195.002",
	"technique_name": "Compromise Software Supply Chain",
	"tactic": "Initial Access",
	"capec": "CAPEC-538",
	"capec_name": "Open-Source Library Manipulation",
	"description": "Download of code without integrity check",
	},
	# ── LDAP 注入 ─────────────────────────────────────────────
	"CWE-90": {
	"technique_id": "T1190",
	"technique_name": "Exploit Public-Facing Application",
	"tactic": "Initial Access",
	"capec": "CAPEC-136",
	"capec_name": "LDAP Injection",
	"description": "LDAP injection via improper LDAP query neutralization",
	},
	# ── XXE ───────────────────────────────────────────────────
	"CWE-611": {
	"technique_id": "T1190",
	"technique_name": "Exploit Public-Facing Application",
	"tactic": "Initial Access",
	"capec": "CAPEC-221",
	"capec_name": "DTD Injection",
	"description": "XML External Entity injection enables file disclosure",
	},
	# ── Prototype Pollution (Node.js) ─────────────────────────
	"CWE-1321": {
	"technique_id": "T1059.007",
	"technique_name": "JavaScript",
	"tactic": "Execution",
	"capec": "CAPEC-1",
	"capec_name": "Accessing Functionality Not Properly Constrained",
	"description": "Prototype pollution via __proto__ manipulation",
	},
	# ── ReDoS ─────────────────────────────────────────────────
	"CWE-1333": {
	"technique_id": "T1499",
	"technique_name": "Endpoint Denial of Service",
	"tactic": "Impact",
	"capec": "CAPEC-492",
	"capec_name": "Regular Expression Exponential Blowup",
	"description": "Inefficient regular expression causes ReDoS",
	},
	# ── DoS ───────────────────────────────────────────────────
	"CWE-400": {
	"technique_id": "T1499",
	"technique_name": "Endpoint Denial of Service",
	"tactic": "Impact",
	"capec": "CAPEC-147",
	"capec_name": "XML Routing Detour Attacks",
	"description": "Uncontrolled resource consumption leads to DoS",
	},
	# ── Open Redirect ─────────────────────────────────────────
	"CWE-601": {
	"technique_id": "T1204.001",
	"technique_name": "Malicious Link",
	"tactic": "Execution",
	"capec": "CAPEC-194",
	"capec_name": "Fake the Source of Data",
	"description": "URL redirection to untrusted sites",
	},
	}

	# Keyword → CWE 快速映射（從 CVE 描述中提取）
	KEYWORD_TO_CWE: dict[str, str] = {
	"sql injection": "CWE-89",
	"sqli": "CWE-89",
	"command injection": "CWE-78",
	"os command": "CWE-78",
	"xss": "CWE-79",
	"cross-site scripting": "CWE-79",
	"cross site scripting": "CWE-79",
	"path traversal": "CWE-22",
	"directory traversal": "CWE-22",
	"authentication bypass": "CWE-287",
	"hardcoded": "CWE-798",
	"hard-coded": "CWE-798",
	"ssrf": "CWE-918",
	"server-side request forgery": "CWE-918",
	"deserialization": "CWE-502",
	"prototype pollution": "CWE-1321",
	"redos": "CWE-1333",
	"denial of service": "CWE-400",
	"open redirect": "CWE-601",
	"xxe": "CWE-611",
	"xml external entity": "CWE-611",
	"buffer overflow": "CWE-120",
	"use after free": "CWE-416",
	"use-after-free": "CWE-416",
	"supply chain": "CWE-494",
	"ldap injection": "CWE-90",
	}


	def lookup_attck_by_cwe(cwe_id: str) -> dict \| None:
	"""
	根據 CWE ID 查詢對應的 ATT&CK Technique。

	Args:
	cwe_id: 格式 "CWE-79" 或 "79"

	Returns:
	{technique_id, technique_name, tactic, capec, ...} 或 None
	"""
	norm = cwe_id.strip().upper()
	if not norm.startswith("CWE-"):
	norm = f"CWE-{norm}"
	result = CWE_TO_ATTCK.get(norm)
	if result:
	logger.info("[ATTCK] CWE %s -> %s (%s)", norm, result["technique_id"], result["technique_name"])
	return result


	def lookup_attck_by_description(description: str) -> dict \| None:
	"""
	從 CVE 描述文字中提取可能的 CWE，再查詢 ATT&CK。

	優先精確 CWE，其次關鍵字匹配。
	"""
	text = description.lower()

	# 1. 從描述中提取明確的 CWE 編號（如 "CWE-79"）
	cwe_match = re.search(r"cwe-(\d+)", text)
	if cwe_match:
	result = lookup_attck_by_cwe(f"CWE-{cwe_match.group(1)}")
	if result:
	return result

	# 2. 關鍵字匹配
	for keyword, cwe in KEYWORD_TO_CWE.items():
	if keyword in text:
	result = lookup_attck_by_cwe(cwe)
	if result:
	return result

	return None


	def get_attck_for_cve(cve_id: str, description: str = "", cwe_ids: list[str] \| None = None) -> dict:
	"""
	程式碼層呼叫：給定 CVE ID + 描述 + CWE 列表，返回最匹配的 ATT&CK Technique。

	供 Intel Fusion _verify_and_recalculate 使用。

	Returns:
	{
	"technique_id": "T1059.007",
	"technique_name": "JavaScript",
	"tactic": "Execution",
	"capec": "CAPEC-86",
	"source": "CWE->ATTCK_MAP",
	"matched_by": "CWE-79" \| "keyword:xss" \| None,
	}
	"""
	# 優先使用明確的 CWE 列表
	if cwe_ids:
	for cwe in cwe_ids:
	result = lookup_attck_by_cwe(cwe)
	if result:
	return {**result, "source": "CWE->ATTCK_MAP", "matched_by": cwe}

	# 降級：從描述文字推斷
	if description:
	result = lookup_attck_by_description(description)
	if result:
	# 找出是哪個關鍵字觸發的
	text = description.lower()
	matched_kw = next((kw for kw in KEYWORD_TO_CWE if kw in text), "keyword")
	return {**result, "source": "CWE->ATTCK_MAP", "matched_by": f"keyword:{matched_kw}"}

	# 無法對應：返回通用 T1190（最常見的漏洞利用 Technique）
	return {
	"technique_id": "T1190",
	"technique_name": "Exploit Public-Facing Application",
	"tactic": "Initial Access",
	"capec": "CAPEC-1",
	"capec_name": "Unknown",
	"description": "No specific ATT&CK mapping found; defaulting to general exploitation",
	"source": "CWE->ATTCK_MAP",
	"matched_by": None,
	}