Spaces:
Sleeping
Sleeping
Ankit19102004 commited on
Commit ·
93621ed
1
Parent(s): cdb0ab8
Update honeypot_api and README
Browse files- honeypot_api.py +9 -3
honeypot_api.py
CHANGED
|
@@ -84,14 +84,14 @@ def extract_intelligence(text):
|
|
| 84 |
|
| 85 |
patterns = {
|
| 86 |
"bankAccounts": r"\b\d{12,18}\b",
|
| 87 |
-
"phoneNumbers": r"(\+?\d{1,3}[- ]?)?\d{10}",
|
| 88 |
"emailAddresses": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]+",
|
| 89 |
"phishingLinks": r"https?://[^\s]+",
|
| 90 |
"upiIds": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z]+",
|
| 91 |
"cardNumbers": r"\b(?:\d{4}[- ]?){3}\d{4}\b",
|
| 92 |
"ifscCodes": r"\b[A-Z]{4}0[A-Z0-9]{6}\b",
|
| 93 |
"transactionIds": r"\b[A-Z0-9]{8,20}\b",
|
| 94 |
-
"caseIds": r"\b(?:CASE|CAS|REF|ID|TICKET)[- ]?[A-Z0-9]{4,}\b",
|
| 95 |
"policyNumbers": r"\b(?:POLICY|POL|PL|INS)[- ]?[A-Z0-9]{4,}\b",
|
| 96 |
"orderNumbers": r"\b(?:ORDER|ORD|OD)[- ]?[A-Z0-9]{4,}\b",
|
| 97 |
"telegramHandles": r"@[a-zA-Z0-9_]{5,}",
|
|
@@ -122,10 +122,16 @@ def extract_intelligence(text):
|
|
| 122 |
if key in ["cardNumbers", "transactionIds", "policyNumbers", "orderNumbers"]:
|
| 123 |
extracted["bankAccounts"].extend(matches)
|
| 124 |
|
| 125 |
-
# Deduplicate final lists
|
| 126 |
for k in extracted:
|
| 127 |
extracted[k] = list(set(extracted[k]))
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
return extracted
|
| 130 |
|
| 131 |
# ============================
|
|
|
|
| 84 |
|
| 85 |
patterns = {
|
| 86 |
"bankAccounts": r"\b\d{12,18}\b",
|
| 87 |
+
"phoneNumbers": r"(?:\+?\d{1,3}[- ]?)?\d{10}\b",
|
| 88 |
"emailAddresses": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]+",
|
| 89 |
"phishingLinks": r"https?://[^\s]+",
|
| 90 |
"upiIds": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z]+",
|
| 91 |
"cardNumbers": r"\b(?:\d{4}[- ]?){3}\d{4}\b",
|
| 92 |
"ifscCodes": r"\b[A-Z]{4}0[A-Z0-9]{6}\b",
|
| 93 |
"transactionIds": r"\b[A-Z0-9]{8,20}\b",
|
| 94 |
+
"caseIds": r"(?:\b(?:CASE|CAS|REF|ID|TICKET)[- ]?[A-Z0-9]{4,}\b|\bC\d{4,}\b)",
|
| 95 |
"policyNumbers": r"\b(?:POLICY|POL|PL|INS)[- ]?[A-Z0-9]{4,}\b",
|
| 96 |
"orderNumbers": r"\b(?:ORDER|ORD|OD)[- ]?[A-Z0-9]{4,}\b",
|
| 97 |
"telegramHandles": r"@[a-zA-Z0-9_]{5,}",
|
|
|
|
| 122 |
if key in ["cardNumbers", "transactionIds", "policyNumbers", "orderNumbers"]:
|
| 123 |
extracted["bankAccounts"].extend(matches)
|
| 124 |
|
|
|
|
| 125 |
for k in extracted:
|
| 126 |
extracted[k] = list(set(extracted[k]))
|
| 127 |
|
| 128 |
+
clean_bank = []
|
| 129 |
+
for acc in extracted["bankAccounts"]:
|
| 130 |
+
digits = re.sub(r"\D", "", acc)
|
| 131 |
+
if 12 <= len(digits) <= 18:
|
| 132 |
+
clean_bank.append(digits)
|
| 133 |
+
extracted["bankAccounts"] = list(set(clean_bank))
|
| 134 |
+
|
| 135 |
return extracted
|
| 136 |
|
| 137 |
# ============================
|