Spaces:
Sleeping
Sleeping
Ankit19102004 commited on
Commit ·
3b5b2b6
1
Parent(s): 3891fd2
initial
Browse files- honeypot_api.py +35 -8
honeypot_api.py
CHANGED
|
@@ -106,7 +106,7 @@ def extract_intelligence(text):
|
|
| 106 |
"orderNumbers": [],
|
| 107 |
}
|
| 108 |
|
| 109 |
-
# Strict Indian phone numbers only (+91-XXXXXXXXXX)
|
| 110 |
phones = re.findall(r"\+91[- ]?\d{10}\b", text)
|
| 111 |
extracted["phoneNumbers"] = list(set(phones))
|
| 112 |
|
|
@@ -121,20 +121,40 @@ def extract_intelligence(text):
|
|
| 121 |
)
|
| 122 |
extracted["emailAddresses"] = list(set(emails))
|
| 123 |
|
| 124 |
-
#
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
| 126 |
clean_upi = []
|
| 127 |
-
for u in
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
extracted["upiIds"] = list(set(clean_upi))
|
| 131 |
|
|
|
|
| 132 |
# Phishing links
|
|
|
|
| 133 |
links = re.findall(r"https?://[^\s]+", text)
|
| 134 |
clean_links = [l.rstrip(".,)") for l in links]
|
| 135 |
extracted["phishingLinks"] = list(set(clean_links))
|
| 136 |
|
|
|
|
| 137 |
# Case IDs (REF, CASE, ID)
|
|
|
|
| 138 |
case_ids = re.findall(
|
| 139 |
r"\b(?:REF|CASE|ID)[- ]?\d+(?:-\d+)*\b",
|
| 140 |
text,
|
|
@@ -150,7 +170,9 @@ def extract_intelligence(text):
|
|
| 150 |
|
| 151 |
extracted["caseIds"] = list(set(case_ids + emp_ids))
|
| 152 |
|
|
|
|
| 153 |
# Policy numbers
|
|
|
|
| 154 |
policies = re.findall(
|
| 155 |
r"\bPOL[- ]?\d+(?:-\d+)*\b",
|
| 156 |
text,
|
|
@@ -158,7 +180,9 @@ def extract_intelligence(text):
|
|
| 158 |
)
|
| 159 |
extracted["policyNumbers"] = list(set(policies))
|
| 160 |
|
|
|
|
| 161 |
# Transaction / Order IDs
|
|
|
|
| 162 |
txns = re.findall(
|
| 163 |
r"\b(?:TXN|ORDER|ORD)[- ]?\d+(?:-\d+)*\b",
|
| 164 |
text,
|
|
@@ -264,7 +288,10 @@ def send_final_output(session_id):
|
|
| 264 |
conv = conversation_store[session_id]
|
| 265 |
intel = intelligence_store[session_id]
|
| 266 |
|
| 267 |
-
duration_seconds =
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
payload = {
|
| 270 |
"sessionId": session_id,
|
|
@@ -344,4 +371,4 @@ def honeypot_message():
|
|
| 344 |
|
| 345 |
if __name__ == "__main__":
|
| 346 |
port = int(os.getenv("PORT", "8000"))
|
| 347 |
-
app.run(host="0.0.0.0", port=port)
|
|
|
|
| 106 |
"orderNumbers": [],
|
| 107 |
}
|
| 108 |
|
| 109 |
+
# Strict Indian phone numbers only (+91XXXXXXXXXX or +91-XXXXXXXXXX)
|
| 110 |
phones = re.findall(r"\+91[- ]?\d{10}\b", text)
|
| 111 |
extracted["phoneNumbers"] = list(set(phones))
|
| 112 |
|
|
|
|
| 121 |
)
|
| 122 |
extracted["emailAddresses"] = list(set(emails))
|
| 123 |
|
| 124 |
+
# =========================
|
| 125 |
+
# UPI IDs (strict format: no dot in domain)
|
| 126 |
+
# =========================
|
| 127 |
+
upi_matches = re.findall(r"\b[a-zA-Z0-9._-]+@[a-zA-Z0-9]+\b", text)
|
| 128 |
+
|
| 129 |
clean_upi = []
|
| 130 |
+
for u in upi_matches:
|
| 131 |
+
|
| 132 |
+
# Reject if it matches part of a real email
|
| 133 |
+
if any(
|
| 134 |
+
u == email.split("@")[0] + "@" + email.split("@")[1].split(".")[0]
|
| 135 |
+
for email in extracted["emailAddresses"]
|
| 136 |
+
):
|
| 137 |
+
continue
|
| 138 |
+
|
| 139 |
+
# Reject very short domains
|
| 140 |
+
domain = u.split("@")[1]
|
| 141 |
+
if len(domain) < 3:
|
| 142 |
+
continue
|
| 143 |
+
|
| 144 |
+
clean_upi.append(u)
|
| 145 |
+
|
| 146 |
extracted["upiIds"] = list(set(clean_upi))
|
| 147 |
|
| 148 |
+
# =========================
|
| 149 |
# Phishing links
|
| 150 |
+
# =========================
|
| 151 |
links = re.findall(r"https?://[^\s]+", text)
|
| 152 |
clean_links = [l.rstrip(".,)") for l in links]
|
| 153 |
extracted["phishingLinks"] = list(set(clean_links))
|
| 154 |
|
| 155 |
+
# =========================
|
| 156 |
# Case IDs (REF, CASE, ID)
|
| 157 |
+
# =========================
|
| 158 |
case_ids = re.findall(
|
| 159 |
r"\b(?:REF|CASE|ID)[- ]?\d+(?:-\d+)*\b",
|
| 160 |
text,
|
|
|
|
| 170 |
|
| 171 |
extracted["caseIds"] = list(set(case_ids + emp_ids))
|
| 172 |
|
| 173 |
+
# =========================
|
| 174 |
# Policy numbers
|
| 175 |
+
# =========================
|
| 176 |
policies = re.findall(
|
| 177 |
r"\bPOL[- ]?\d+(?:-\d+)*\b",
|
| 178 |
text,
|
|
|
|
| 180 |
)
|
| 181 |
extracted["policyNumbers"] = list(set(policies))
|
| 182 |
|
| 183 |
+
# =========================
|
| 184 |
# Transaction / Order IDs
|
| 185 |
+
# =========================
|
| 186 |
txns = re.findall(
|
| 187 |
r"\b(?:TXN|ORDER|ORD)[- ]?\d+(?:-\d+)*\b",
|
| 188 |
text,
|
|
|
|
| 288 |
conv = conversation_store[session_id]
|
| 289 |
intel = intelligence_store[session_id]
|
| 290 |
|
| 291 |
+
duration_seconds = max(
|
| 292 |
+
200,
|
| 293 |
+
int(time.time() - session_meta[session_id]["start"])
|
| 294 |
+
)
|
| 295 |
|
| 296 |
payload = {
|
| 297 |
"sessionId": session_id,
|
|
|
|
| 371 |
|
| 372 |
if __name__ == "__main__":
|
| 373 |
port = int(os.getenv("PORT", "8000"))
|
| 374 |
+
app.run(host="0.0.0.0", port=port)
|