Spaces:
Sleeping
Sleeping
Update header_analyzer.py
Browse files- header_analyzer.py +25 -39
header_analyzer.py
CHANGED
|
@@ -5,7 +5,6 @@ import whois
|
|
| 5 |
from datetime import datetime
|
| 6 |
from email.utils import parseaddr
|
| 7 |
|
| 8 |
-
# Known brands (expandable)
|
| 9 |
BRAND_OFFICIAL = {
|
| 10 |
"paypal": ["paypal.com"],
|
| 11 |
"amazon": ["amazon.com"],
|
|
@@ -15,7 +14,6 @@ BRAND_OFFICIAL = {
|
|
| 15 |
}
|
| 16 |
|
| 17 |
SUSPICIOUS_TLDS = {"xyz", "top", "click", "work", "loan", "tk", "zip", "mov"}
|
| 18 |
-
|
| 19 |
FREE_EMAIL_PROVIDERS = {
|
| 20 |
"gmail.com", "yahoo.com", "outlook.com", "hotmail.com", "icloud.com"
|
| 21 |
}
|
|
@@ -41,49 +39,52 @@ def _domain_age_days(domain: str):
|
|
| 41 |
|
| 42 |
|
| 43 |
def analyze_headers(headers: dict, body: str = ""):
|
| 44 |
-
"""
|
| 45 |
-
Returns:
|
| 46 |
-
findings: list[str]
|
| 47 |
-
score: int
|
| 48 |
-
auth_summary: str
|
| 49 |
-
"""
|
| 50 |
findings = []
|
| 51 |
score = 0
|
| 52 |
headers = headers or {}
|
| 53 |
body_l = (body or "").lower()
|
| 54 |
|
| 55 |
-
# =========================
|
| 56 |
-
# AUTHENTICATION RESULTS
|
| 57 |
-
# =========================
|
| 58 |
auth_header = (
|
| 59 |
headers.get("Authentication-Results")
|
| 60 |
or headers.get("Authentication-results")
|
| 61 |
or ""
|
| 62 |
).lower()
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
auth_summary = []
|
| 65 |
|
| 66 |
if "spf=fail" in auth_header:
|
| 67 |
findings.append("Header: SPF authentication failed")
|
|
|
|
| 68 |
auth_summary.append("SPF failed")
|
| 69 |
score += 25
|
|
|
|
|
|
|
| 70 |
|
| 71 |
if "dkim=fail" in auth_header or "dkim=permerror" in auth_header:
|
| 72 |
findings.append("Header: DKIM authentication failed")
|
|
|
|
| 73 |
auth_summary.append("DKIM failed")
|
| 74 |
score += 25
|
|
|
|
|
|
|
| 75 |
|
| 76 |
if "dmarc=fail" in auth_header:
|
| 77 |
findings.append("Header: DMARC authentication failed")
|
|
|
|
| 78 |
auth_summary.append("DMARC failed")
|
| 79 |
score += 30
|
|
|
|
|
|
|
| 80 |
|
| 81 |
if not auth_summary:
|
| 82 |
auth_summary.append("No strong authentication failures detected")
|
| 83 |
|
| 84 |
-
# =========================
|
| 85 |
-
# FROM / REPLY-TO ANALYSIS
|
| 86 |
-
# =========================
|
| 87 |
from_domain = _extract_domain(headers.get("From", ""))
|
| 88 |
reply_domain = _extract_domain(headers.get("Reply-To", ""))
|
| 89 |
|
|
@@ -93,47 +94,29 @@ def analyze_headers(headers: dict, body: str = ""):
|
|
| 93 |
)
|
| 94 |
score += 35
|
| 95 |
|
| 96 |
-
|
| 97 |
-
# BEC INDICATORS
|
| 98 |
-
# =========================
|
| 99 |
-
if from_domain and from_domain in FREE_EMAIL_PROVIDERS:
|
| 100 |
findings.append(f"Header: Free email provider used ({from_domain})")
|
| 101 |
score += 15
|
| 102 |
|
| 103 |
if any(k.lower() in headers for k in ["bcc", "cc"]) and not headers.get("To"):
|
| 104 |
-
findings.append("Header: Possible BEC —
|
| 105 |
score += 20
|
| 106 |
|
| 107 |
-
if body_l
|
| 108 |
-
|
| 109 |
-
):
|
| 110 |
-
findings.append("Header/Body: Financial request detected (BEC pattern)")
|
| 111 |
score += 35
|
| 112 |
|
| 113 |
-
# =========================
|
| 114 |
-
# DOMAIN REPUTATION
|
| 115 |
-
# =========================
|
| 116 |
if from_domain:
|
| 117 |
tld = from_domain.split(".")[-1]
|
| 118 |
-
|
| 119 |
if tld in SUSPICIOUS_TLDS:
|
| 120 |
findings.append(f"Header: Suspicious TLD used ({tld})")
|
| 121 |
score += 20
|
| 122 |
|
| 123 |
age = _domain_age_days(from_domain)
|
| 124 |
if age is not None and age < 90:
|
| 125 |
-
findings.append(
|
| 126 |
-
f"Header: Sender domain is very new ({age} days old)"
|
| 127 |
-
)
|
| 128 |
score += 30
|
| 129 |
|
| 130 |
-
if len(from_domain.split(".")) > 4:
|
| 131 |
-
findings.append("Header: Excessive subdomains detected")
|
| 132 |
-
score += 15
|
| 133 |
-
|
| 134 |
-
# =========================
|
| 135 |
-
# BRAND / LOOK-ALIKE SPOOFING
|
| 136 |
-
# =========================
|
| 137 |
for brand, legit_domains in BRAND_OFFICIAL.items():
|
| 138 |
if brand in from_domain:
|
| 139 |
if not any(from_domain.endswith(ld) for ld in legit_domains):
|
|
@@ -150,6 +133,9 @@ def analyze_headers(headers: dict, body: str = ""):
|
|
| 150 |
)
|
| 151 |
score += 40
|
| 152 |
|
| 153 |
-
score =
|
| 154 |
|
| 155 |
-
return findings, score,
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from datetime import datetime
|
| 6 |
from email.utils import parseaddr
|
| 7 |
|
|
|
|
| 8 |
BRAND_OFFICIAL = {
|
| 9 |
"paypal": ["paypal.com"],
|
| 10 |
"amazon": ["amazon.com"],
|
|
|
|
| 14 |
}
|
| 15 |
|
| 16 |
SUSPICIOUS_TLDS = {"xyz", "top", "click", "work", "loan", "tk", "zip", "mov"}
|
|
|
|
| 17 |
FREE_EMAIL_PROVIDERS = {
|
| 18 |
"gmail.com", "yahoo.com", "outlook.com", "hotmail.com", "icloud.com"
|
| 19 |
}
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
def analyze_headers(headers: dict, body: str = ""):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
findings = []
|
| 43 |
score = 0
|
| 44 |
headers = headers or {}
|
| 45 |
body_l = (body or "").lower()
|
| 46 |
|
|
|
|
|
|
|
|
|
|
| 47 |
auth_header = (
|
| 48 |
headers.get("Authentication-Results")
|
| 49 |
or headers.get("Authentication-results")
|
| 50 |
or ""
|
| 51 |
).lower()
|
| 52 |
|
| 53 |
+
auth_results = {
|
| 54 |
+
"spf": "unknown",
|
| 55 |
+
"dkim": "unknown",
|
| 56 |
+
"dmarc": "unknown",
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
auth_summary = []
|
| 60 |
|
| 61 |
if "spf=fail" in auth_header:
|
| 62 |
findings.append("Header: SPF authentication failed")
|
| 63 |
+
auth_results["spf"] = "fail"
|
| 64 |
auth_summary.append("SPF failed")
|
| 65 |
score += 25
|
| 66 |
+
elif "spf=pass" in auth_header:
|
| 67 |
+
auth_results["spf"] = "pass"
|
| 68 |
|
| 69 |
if "dkim=fail" in auth_header or "dkim=permerror" in auth_header:
|
| 70 |
findings.append("Header: DKIM authentication failed")
|
| 71 |
+
auth_results["dkim"] = "fail"
|
| 72 |
auth_summary.append("DKIM failed")
|
| 73 |
score += 25
|
| 74 |
+
elif "dkim=pass" in auth_header:
|
| 75 |
+
auth_results["dkim"] = "pass"
|
| 76 |
|
| 77 |
if "dmarc=fail" in auth_header:
|
| 78 |
findings.append("Header: DMARC authentication failed")
|
| 79 |
+
auth_results["dmarc"] = "fail"
|
| 80 |
auth_summary.append("DMARC failed")
|
| 81 |
score += 30
|
| 82 |
+
elif "dmarc=pass" in auth_header:
|
| 83 |
+
auth_results["dmarc"] = "pass"
|
| 84 |
|
| 85 |
if not auth_summary:
|
| 86 |
auth_summary.append("No strong authentication failures detected")
|
| 87 |
|
|
|
|
|
|
|
|
|
|
| 88 |
from_domain = _extract_domain(headers.get("From", ""))
|
| 89 |
reply_domain = _extract_domain(headers.get("Reply-To", ""))
|
| 90 |
|
|
|
|
| 94 |
)
|
| 95 |
score += 35
|
| 96 |
|
| 97 |
+
if from_domain in FREE_EMAIL_PROVIDERS:
|
|
|
|
|
|
|
|
|
|
| 98 |
findings.append(f"Header: Free email provider used ({from_domain})")
|
| 99 |
score += 15
|
| 100 |
|
| 101 |
if any(k.lower() in headers for k in ["bcc", "cc"]) and not headers.get("To"):
|
| 102 |
+
findings.append("Header: Possible BEC — CC/BCC without To header")
|
| 103 |
score += 20
|
| 104 |
|
| 105 |
+
if any(x in body_l for x in ["wire transfer", "urgent payment", "bank details"]):
|
| 106 |
+
findings.append("Header/Body: Financial request pattern (BEC)")
|
|
|
|
|
|
|
| 107 |
score += 35
|
| 108 |
|
|
|
|
|
|
|
|
|
|
| 109 |
if from_domain:
|
| 110 |
tld = from_domain.split(".")[-1]
|
|
|
|
| 111 |
if tld in SUSPICIOUS_TLDS:
|
| 112 |
findings.append(f"Header: Suspicious TLD used ({tld})")
|
| 113 |
score += 20
|
| 114 |
|
| 115 |
age = _domain_age_days(from_domain)
|
| 116 |
if age is not None and age < 90:
|
| 117 |
+
findings.append(f"Header: Sender domain very new ({age} days)")
|
|
|
|
|
|
|
| 118 |
score += 30
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
for brand, legit_domains in BRAND_OFFICIAL.items():
|
| 121 |
if brand in from_domain:
|
| 122 |
if not any(from_domain.endswith(ld) for ld in legit_domains):
|
|
|
|
| 133 |
)
|
| 134 |
score += 40
|
| 135 |
|
| 136 |
+
score = min(score, 100)
|
| 137 |
|
| 138 |
+
return findings, score, {
|
| 139 |
+
"summary": ", ".join(auth_summary),
|
| 140 |
+
"results": auth_results,
|
| 141 |
+
}
|