princemaxp commited on
Commit
9431f5e
·
verified ·
1 Parent(s): 271bd39

Update header_analyzer.py

Browse files
Files changed (1) hide show
  1. header_analyzer.py +25 -39
header_analyzer.py CHANGED
@@ -5,7 +5,6 @@ import whois
5
  from datetime import datetime
6
  from email.utils import parseaddr
7
 
8
- # Known brands (expandable)
9
  BRAND_OFFICIAL = {
10
  "paypal": ["paypal.com"],
11
  "amazon": ["amazon.com"],
@@ -15,7 +14,6 @@ BRAND_OFFICIAL = {
15
  }
16
 
17
  SUSPICIOUS_TLDS = {"xyz", "top", "click", "work", "loan", "tk", "zip", "mov"}
18
-
19
  FREE_EMAIL_PROVIDERS = {
20
  "gmail.com", "yahoo.com", "outlook.com", "hotmail.com", "icloud.com"
21
  }
@@ -41,49 +39,52 @@ def _domain_age_days(domain: str):
41
 
42
 
43
  def analyze_headers(headers: dict, body: str = ""):
44
- """
45
- Returns:
46
- findings: list[str]
47
- score: int
48
- auth_summary: str
49
- """
50
  findings = []
51
  score = 0
52
  headers = headers or {}
53
  body_l = (body or "").lower()
54
 
55
- # =========================
56
- # AUTHENTICATION RESULTS
57
- # =========================
58
  auth_header = (
59
  headers.get("Authentication-Results")
60
  or headers.get("Authentication-results")
61
  or ""
62
  ).lower()
63
 
 
 
 
 
 
 
64
  auth_summary = []
65
 
66
  if "spf=fail" in auth_header:
67
  findings.append("Header: SPF authentication failed")
 
68
  auth_summary.append("SPF failed")
69
  score += 25
 
 
70
 
71
  if "dkim=fail" in auth_header or "dkim=permerror" in auth_header:
72
  findings.append("Header: DKIM authentication failed")
 
73
  auth_summary.append("DKIM failed")
74
  score += 25
 
 
75
 
76
  if "dmarc=fail" in auth_header:
77
  findings.append("Header: DMARC authentication failed")
 
78
  auth_summary.append("DMARC failed")
79
  score += 30
 
 
80
 
81
  if not auth_summary:
82
  auth_summary.append("No strong authentication failures detected")
83
 
84
- # =========================
85
- # FROM / REPLY-TO ANALYSIS
86
- # =========================
87
  from_domain = _extract_domain(headers.get("From", ""))
88
  reply_domain = _extract_domain(headers.get("Reply-To", ""))
89
 
@@ -93,47 +94,29 @@ def analyze_headers(headers: dict, body: str = ""):
93
  )
94
  score += 35
95
 
96
- # =========================
97
- # BEC INDICATORS
98
- # =========================
99
- if from_domain and from_domain in FREE_EMAIL_PROVIDERS:
100
  findings.append(f"Header: Free email provider used ({from_domain})")
101
  score += 15
102
 
103
  if any(k.lower() in headers for k in ["bcc", "cc"]) and not headers.get("To"):
104
- findings.append("Header: Possible BEC — missing To field with CC/BCC usage")
105
  score += 20
106
 
107
- if body_l and any(
108
- x in body_l for x in ["wire transfer", "urgent payment", "bank details"]
109
- ):
110
- findings.append("Header/Body: Financial request detected (BEC pattern)")
111
  score += 35
112
 
113
- # =========================
114
- # DOMAIN REPUTATION
115
- # =========================
116
  if from_domain:
117
  tld = from_domain.split(".")[-1]
118
-
119
  if tld in SUSPICIOUS_TLDS:
120
  findings.append(f"Header: Suspicious TLD used ({tld})")
121
  score += 20
122
 
123
  age = _domain_age_days(from_domain)
124
  if age is not None and age < 90:
125
- findings.append(
126
- f"Header: Sender domain is very new ({age} days old)"
127
- )
128
  score += 30
129
 
130
- if len(from_domain.split(".")) > 4:
131
- findings.append("Header: Excessive subdomains detected")
132
- score += 15
133
-
134
- # =========================
135
- # BRAND / LOOK-ALIKE SPOOFING
136
- # =========================
137
  for brand, legit_domains in BRAND_OFFICIAL.items():
138
  if brand in from_domain:
139
  if not any(from_domain.endswith(ld) for ld in legit_domains):
@@ -150,6 +133,9 @@ def analyze_headers(headers: dict, body: str = ""):
150
  )
151
  score += 40
152
 
153
- score = int(min(score, 100))
154
 
155
- return findings, score, ", ".join(auth_summary)
 
 
 
 
5
  from datetime import datetime
6
  from email.utils import parseaddr
7
 
 
8
  BRAND_OFFICIAL = {
9
  "paypal": ["paypal.com"],
10
  "amazon": ["amazon.com"],
 
14
  }
15
 
16
  SUSPICIOUS_TLDS = {"xyz", "top", "click", "work", "loan", "tk", "zip", "mov"}
 
17
  FREE_EMAIL_PROVIDERS = {
18
  "gmail.com", "yahoo.com", "outlook.com", "hotmail.com", "icloud.com"
19
  }
 
39
 
40
 
41
  def analyze_headers(headers: dict, body: str = ""):
 
 
 
 
 
 
42
  findings = []
43
  score = 0
44
  headers = headers or {}
45
  body_l = (body or "").lower()
46
 
 
 
 
47
  auth_header = (
48
  headers.get("Authentication-Results")
49
  or headers.get("Authentication-results")
50
  or ""
51
  ).lower()
52
 
53
+ auth_results = {
54
+ "spf": "unknown",
55
+ "dkim": "unknown",
56
+ "dmarc": "unknown",
57
+ }
58
+
59
  auth_summary = []
60
 
61
  if "spf=fail" in auth_header:
62
  findings.append("Header: SPF authentication failed")
63
+ auth_results["spf"] = "fail"
64
  auth_summary.append("SPF failed")
65
  score += 25
66
+ elif "spf=pass" in auth_header:
67
+ auth_results["spf"] = "pass"
68
 
69
  if "dkim=fail" in auth_header or "dkim=permerror" in auth_header:
70
  findings.append("Header: DKIM authentication failed")
71
+ auth_results["dkim"] = "fail"
72
  auth_summary.append("DKIM failed")
73
  score += 25
74
+ elif "dkim=pass" in auth_header:
75
+ auth_results["dkim"] = "pass"
76
 
77
  if "dmarc=fail" in auth_header:
78
  findings.append("Header: DMARC authentication failed")
79
+ auth_results["dmarc"] = "fail"
80
  auth_summary.append("DMARC failed")
81
  score += 30
82
+ elif "dmarc=pass" in auth_header:
83
+ auth_results["dmarc"] = "pass"
84
 
85
  if not auth_summary:
86
  auth_summary.append("No strong authentication failures detected")
87
 
 
 
 
88
  from_domain = _extract_domain(headers.get("From", ""))
89
  reply_domain = _extract_domain(headers.get("Reply-To", ""))
90
 
 
94
  )
95
  score += 35
96
 
97
+ if from_domain in FREE_EMAIL_PROVIDERS:
 
 
 
98
  findings.append(f"Header: Free email provider used ({from_domain})")
99
  score += 15
100
 
101
  if any(k.lower() in headers for k in ["bcc", "cc"]) and not headers.get("To"):
102
+ findings.append("Header: Possible BEC — CC/BCC without To header")
103
  score += 20
104
 
105
+ if any(x in body_l for x in ["wire transfer", "urgent payment", "bank details"]):
106
+ findings.append("Header/Body: Financial request pattern (BEC)")
 
 
107
  score += 35
108
 
 
 
 
109
  if from_domain:
110
  tld = from_domain.split(".")[-1]
 
111
  if tld in SUSPICIOUS_TLDS:
112
  findings.append(f"Header: Suspicious TLD used ({tld})")
113
  score += 20
114
 
115
  age = _domain_age_days(from_domain)
116
  if age is not None and age < 90:
117
+ findings.append(f"Header: Sender domain very new ({age} days)")
 
 
118
  score += 30
119
 
 
 
 
 
 
 
 
120
  for brand, legit_domains in BRAND_OFFICIAL.items():
121
  if brand in from_domain:
122
  if not any(from_domain.endswith(ld) for ld in legit_domains):
 
133
  )
134
  score += 40
135
 
136
+ score = min(score, 100)
137
 
138
+ return findings, score, {
139
+ "summary": ", ".join(auth_summary),
140
+ "results": auth_results,
141
+ }