Threat_Hunter / tools /cwe_registry.py
EricChen2005's picture
Deploy ThreatHunter - AMD MI300X + Qwen2.5-32B
c8d30bc
"""Canonical CWE registry shared by pipeline, Advisor, UI payloads, and tests."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
@dataclass(frozen=True)
class CwePatternMeta:
cwe_id: str
owasp_category: str
severity: str
weakness_family: str
evidence_type: str = "code_scan"
DEFAULT_PATTERN_META = CwePatternMeta(
cwe_id="CWE-unknown",
owasp_category="A03:2021-Injection",
severity="MEDIUM",
weakness_family="unknown",
)
SEVERITY_RANK = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1, "UNKNOWN": 0}
def _family_for_cwe(cwe_id: str) -> str:
family_by_cwe = {
"CWE-22": "path_traversal",
"CWE-78": "command_injection",
"CWE-79": "xss",
"CWE-89": "sql_injection",
"CWE-90": "ldap_injection",
"CWE-94": "code_injection",
"CWE-98": "file_inclusion",
"CWE-119": "memory_safety",
"CWE-120": "memory_safety",
"CWE-134": "format_string",
"CWE-190": "integer_overflow",
"CWE-248": "exception_handling",
"CWE-327": "weak_crypto",
"CWE-362": "race_condition",
"CWE-377": "temporary_file",
"CWE-415": "memory_safety",
"CWE-416": "memory_safety",
"CWE-434": "file_upload",
"CWE-476": "null_dereference",
"CWE-502": "unsafe_deserialization",
"CWE-611": "xxe",
"CWE-798": "hardcoded_secret",
"CWE-917": "expression_injection",
"CWE-918": "ssrf",
"CWE-943": "nosql_injection",
"CWE-1321": "prototype_pollution",
"CWE-1333": "redos",
"CWE-915": "mass_assignment",
}
return family_by_cwe.get(cwe_id, "code_weakness")
def _meta(cwe_id: str, owasp_category: str, severity: str) -> CwePatternMeta:
return CwePatternMeta(
cwe_id=cwe_id,
owasp_category=owasp_category,
severity=severity,
weakness_family=_family_for_cwe(cwe_id),
)
PATTERN_CWE_REGISTRY: dict[str, CwePatternMeta] = {
"SQL_INJECTION": _meta("CWE-89", "A03:2021-Injection", "CRITICAL"),
"SQL_CONCAT": _meta("CWE-89", "A03:2021-Injection", "CRITICAL"),
"SQL_CONCAT_PHP": _meta("CWE-89", "A03:2021-Injection", "CRITICAL"),
"SQL_FORMAT_RUST": _meta("CWE-89", "A03:2021-Injection", "CRITICAL"),
"SQL_INJECT_CS": _meta("CWE-89", "A03:2021-Injection", "CRITICAL"),
"SQL_STATEMENT": _meta("CWE-89", "A03:2021-Injection", "CRITICAL"),
"TAINT_SUPERGLOBAL": _meta("CWE-89", "A03:2021-Injection", "HIGH"),
"CMD_INJECTION": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"COMMAND_INJECTION": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"CMD_RUST": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"FFI_SYSTEM": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"DANGEROUS_ALIAS_PY": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"SUBPROCESS_SHELL_ALIAS_PY": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"OPEN_PIPE": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"SHELL_EXEC": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"CHILD_PROCESS": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"CMD_INJECTION_CS": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"CMD_UNSAFE": _meta("CWE-78", "A03:2021-Injection", "CRITICAL"),
"CMD_PATTERN": _meta("CWE-78", "A03:2021-Injection", "HIGH"),
"INNERHTML_XSS": _meta("CWE-79", "A03:2021-Injection", "HIGH"),
"REFLECTED_XSS_JS": _meta("CWE-79", "A03:2021-Injection", "HIGH"),
"XSS": _meta("CWE-79", "A03:2021-Injection", "HIGH"),
"XSS_ECHO_PHP": _meta("CWE-79", "A03:2021-Injection", "HIGH"),
"XSS_CS": _meta("CWE-79", "A03:2021-Injection", "HIGH"),
"TEMPLATE_UNESCAPED": _meta("CWE-79", "A03:2021-Injection", "HIGH"),
"EVAL_EXEC": _meta("CWE-94", "A03:2021-Injection", "CRITICAL"),
"EVAL_USAGE": _meta("CWE-94", "A03:2021-Injection", "CRITICAL"),
"EVAL_INJECTION": _meta("CWE-94", "A03:2021-Injection", "CRITICAL"),
"CODE_INJECTION": _meta("CWE-94", "A03:2021-Injection", "CRITICAL"),
"FILE_INCLUDE": _meta("CWE-98", "A03:2021-Injection", "HIGH"),
"FILE_INCLUSION": _meta("CWE-98", "A03:2021-Injection", "HIGH"),
"SSRF_RISK": _meta("CWE-918", "A10:2021-Server-Side Request Forgery", "HIGH"),
"SSRF": _meta("CWE-918", "A10:2021-Server-Side Request Forgery", "HIGH"),
"SSRF_VARIABLE": _meta("CWE-918", "A10:2021-Server-Side Request Forgery", "HIGH"),
"SSRF_PHP": _meta("CWE-918", "A10:2021-Server-Side Request Forgery", "HIGH"),
"SSRF_GO": _meta("CWE-918", "A10:2021-Server-Side Request Forgery", "HIGH"),
"SSRF_JAVA": _meta("CWE-918", "A10:2021-Server-Side Request Forgery", "HIGH"),
"SSRF_JS": _meta("CWE-918", "A10:2021-Server-Side Request Forgery", "HIGH"),
"PICKLE_UNSAFE": _meta("CWE-502", "A08:2021-Software and Data Integrity", "CRITICAL"),
"YAML_UNSAFE": _meta("CWE-502", "A08:2021-Software and Data Integrity", "HIGH"),
"UNSERIALIZE_PHP": _meta("CWE-502", "A08:2021-Software and Data Integrity", "CRITICAL"),
"DESERIALIZATION": _meta("CWE-502", "A08:2021-Software and Data Integrity", "CRITICAL"),
"INSECURE_DESERIALIZATION": _meta("CWE-502", "A08:2021-Software and Data Integrity", "CRITICAL"),
"DESERIALIZE_UNSAFE": _meta("CWE-502", "A08:2021-Software and Data Integrity", "CRITICAL"),
"DESERIALIZE_UNSAFE_CS": _meta("CWE-502", "A08:2021-Software and Data Integrity", "CRITICAL"),
"HARDCODED_SECRET": _meta("CWE-798", "A07:2021-Identification and Authentication Failures", "HIGH"),
"HARDCODED_CREDENTIALS": _meta("CWE-798", "A07:2021-Identification and Authentication Failures", "HIGH"),
"PATH_TRAVERSAL": _meta("CWE-22", "A01:2021-Broken Access Control", "HIGH"),
"PATH_TRAVERSAL_PHP": _meta("CWE-22", "A01:2021-Broken Access Control", "HIGH"),
"PATH_TRAVERSAL_JAVA": _meta("CWE-22", "A01:2021-Broken Access Control", "HIGH"),
"PATH_TRAVERSAL_JS": _meta("CWE-22", "A01:2021-Broken Access Control", "HIGH"),
"PATH_TRAVERSAL_CS": _meta("CWE-22", "A01:2021-Broken Access Control", "HIGH"),
"XXE": _meta("CWE-611", "A05:2021-Security Misconfiguration", "HIGH"),
"XXE_ENTITY": _meta("CWE-611", "A05:2021-Security Misconfiguration", "HIGH"),
"XXE_FACTORY": _meta("CWE-611", "A05:2021-Security Misconfiguration", "HIGH"),
"XXE_PHP": _meta("CWE-611", "A05:2021-Security Misconfiguration", "HIGH"),
"XXE_CS": _meta("CWE-611", "A05:2021-Security Misconfiguration", "HIGH"),
"XXE_FAULT": _meta("CWE-611", "A05:2021-Security Misconfiguration", "HIGH"),
"LDAP_INJECTION": _meta("CWE-90", "A03:2021-Injection", "HIGH"),
"LDAP_INJECT_CS": _meta("CWE-90", "A03:2021-Injection", "HIGH"),
"PROTOTYPE_POLLUTION": _meta("CWE-1321", "A03:2021-Injection", "CRITICAL"),
"NOSQL_INJECTION": _meta("CWE-943", "A03:2021-Injection", "HIGH"),
"LOG4SHELL_JNDI": _meta("CWE-917", "A09:2021-Security Logging and Monitoring", "CRITICAL"),
"LOG_INJECTION_JAVA": _meta("CWE-917", "A09:2021-Security Logging and Monitoring", "CRITICAL"),
"BUFFER_OVERFLOW": _meta("CWE-120", "A06:2021-Vulnerable Components", "CRITICAL"),
"GETS_UNSAFE": _meta("CWE-242", "A06:2021-Vulnerable Components", "HIGH"),
"DOUBLE_FREE_C": _meta("CWE-415", "A06:2021-Vulnerable Components", "CRITICAL"),
"INTEGER_OVERFLOW_C": _meta("CWE-190", "A06:2021-Vulnerable Components", "HIGH"),
"TMPNAM_UNSAFE": _meta("CWE-377", "A06:2021-Vulnerable Components", "HIGH"),
"FORMAT_STRING": _meta("CWE-134", "A03:2021-Injection", "HIGH"),
"UNSAFE_BLOCK": _meta("CWE-119", "A06:2021-Vulnerable Components", "HIGH"),
"RAW_PTR": _meta("CWE-119", "A06:2021-Vulnerable Components", "HIGH"),
"RAW_PTR_WRITE_RUST": _meta("CWE-119", "A06:2021-Vulnerable Components", "CRITICAL"),
"OUT_OF_BOUNDS_PTR_RUST": _meta("CWE-119", "A06:2021-Vulnerable Components", "CRITICAL"),
"NULL_PTR_RUST": _meta("CWE-476", "A06:2021-Vulnerable Components", "HIGH"),
"NULL_DEREF_RUST": _meta("CWE-476", "A06:2021-Vulnerable Components", "HIGH"),
"USE_AFTER_FREE": _meta("CWE-416", "A06:2021-Vulnerable Components", "CRITICAL"),
"UAF_RUST": _meta("CWE-416", "A06:2021-Vulnerable Components", "CRITICAL"),
"UAF_RUST_DEREF": _meta("CWE-416", "A06:2021-Vulnerable Components", "CRITICAL"),
"UNWRAP_PANIC": _meta("CWE-248", "A05:2021-Security Misconfiguration", "MEDIUM"),
"UNTRUSTED_UNWRAP_RUST": _meta("CWE-248", "A05:2021-Security Misconfiguration", "HIGH"),
"UPLOAD_PHP": _meta("CWE-434", "A05:2021-Security Misconfiguration", "HIGH"),
"CRYPTO_WEAK": _meta("CWE-327", "A02:2021-Cryptographic Failures", "HIGH"),
"RACE_CONDITION_GO": _meta("CWE-362", "A04:2021-Insecure Design", "HIGH"),
"MASS_ASSIGNMENT_JS": _meta("CWE-915", "A01:2021-Broken Access Control", "HIGH"),
"REDOS_JS": _meta("CWE-1333", "A06:2021-Vulnerable Components", "MEDIUM"),
"REDOS": _meta("CWE-1333", "A06:2021-Vulnerable Components", "MEDIUM"),
}
def severity_rank(severity: str | None) -> int:
return SEVERITY_RANK.get(str(severity or "UNKNOWN").upper(), 0)
def get_pattern_meta(pattern_type: str | None) -> CwePatternMeta:
if not pattern_type:
return DEFAULT_PATTERN_META
return PATTERN_CWE_REGISTRY.get(str(pattern_type).upper(), DEFAULT_PATTERN_META)
def pattern_type_to_cwe(pattern_type: str | None) -> str | None:
meta = get_pattern_meta(pattern_type)
if meta.cwe_id == DEFAULT_PATTERN_META.cwe_id:
return None
return meta.cwe_id
def build_cwe_reference(cwe_id: str | None) -> dict[str, Any] | None:
if not cwe_id or not str(cwe_id).startswith("CWE-"):
return None
normalized = str(cwe_id).upper()
try:
from tools.cwe_database import get_cwe_info
except ImportError:
info = None
else:
info = get_cwe_info(normalized)
if not info:
return {
"id": normalized,
"name": normalized,
"source": "ThreatHunter CWE registry",
"nist_severity": "UNKNOWN",
"cvss_base": None,
"owasp_2021": "",
"cwe_url": f"https://cwe.mitre.org/data/definitions/{normalized.replace('CWE-', '')}.html",
"description": "",
"remediation_zh": "",
"representative_cves": [],
"disclaimer": (
"代表性 CVE 為同類弱點的真實被利用案例,"
"非本程式碼的直接 CVE 識別碼。"
"用於說明此類弱點的風險嚴重性。"
),
}
return {
"id": normalized,
"name": info.get("name", normalized),
"source": info.get("source", "MITRE CWE v4.14"),
"nist_severity": info.get("nist_severity", "UNKNOWN"),
"cvss_base": info.get("cvss_base", None),
"owasp_2021": info.get("owasp_2021", ""),
"cwe_url": info.get("cwe_url", f"https://cwe.mitre.org/data/definitions/{normalized.replace('CWE-', '')}.html"),
"description": info.get("description", "")[:300],
"remediation_zh": info.get("remediation_zh", info.get("remediation_en", "")),
"representative_cves": info.get("representative_cves", [])[:3],
"disclaimer": (
"代表性 CVE 為同類弱點的真實被利用案例,"
"非本程式碼的直接 CVE 識別碼。"
"用於說明此類弱點的風險嚴重性。"
),
}
def registry_snapshot() -> dict[str, dict[str, str]]:
return {
pattern_type: {
"cwe_id": meta.cwe_id,
"owasp_category": meta.owasp_category,
"severity": meta.severity,
"weakness_family": meta.weakness_family,
"evidence_type": meta.evidence_type,
}
for pattern_type, meta in sorted(PATTERN_CWE_REGISTRY.items())
}