Ankit19102004 commited on
Commit
3b5b2b6
·
1 Parent(s): 3891fd2
Files changed (1) hide show
  1. honeypot_api.py +35 -8
honeypot_api.py CHANGED
@@ -106,7 +106,7 @@ def extract_intelligence(text):
106
  "orderNumbers": [],
107
  }
108
 
109
- # Strict Indian phone numbers only (+91-XXXXXXXXXX)
110
  phones = re.findall(r"\+91[- ]?\d{10}\b", text)
111
  extracted["phoneNumbers"] = list(set(phones))
112
 
@@ -121,20 +121,40 @@ def extract_intelligence(text):
121
  )
122
  extracted["emailAddresses"] = list(set(emails))
123
 
124
- # UPI IDs (no dot-domain)
125
- upis = re.findall(r"\b[a-zA-Z0-9._-]+@[a-zA-Z]+\b", text)
 
 
 
126
  clean_upi = []
127
- for u in upis:
128
- if u not in extracted["emailAddresses"]:
129
- clean_upi.append(u)
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  extracted["upiIds"] = list(set(clean_upi))
131
 
 
132
  # Phishing links
 
133
  links = re.findall(r"https?://[^\s]+", text)
134
  clean_links = [l.rstrip(".,)") for l in links]
135
  extracted["phishingLinks"] = list(set(clean_links))
136
 
 
137
  # Case IDs (REF, CASE, ID)
 
138
  case_ids = re.findall(
139
  r"\b(?:REF|CASE|ID)[- ]?\d+(?:-\d+)*\b",
140
  text,
@@ -150,7 +170,9 @@ def extract_intelligence(text):
150
 
151
  extracted["caseIds"] = list(set(case_ids + emp_ids))
152
 
 
153
  # Policy numbers
 
154
  policies = re.findall(
155
  r"\bPOL[- ]?\d+(?:-\d+)*\b",
156
  text,
@@ -158,7 +180,9 @@ def extract_intelligence(text):
158
  )
159
  extracted["policyNumbers"] = list(set(policies))
160
 
 
161
  # Transaction / Order IDs
 
162
  txns = re.findall(
163
  r"\b(?:TXN|ORDER|ORD)[- ]?\d+(?:-\d+)*\b",
164
  text,
@@ -264,7 +288,10 @@ def send_final_output(session_id):
264
  conv = conversation_store[session_id]
265
  intel = intelligence_store[session_id]
266
 
267
- duration_seconds = int(time.time() - session_meta[session_id]["start"])
 
 
 
268
 
269
  payload = {
270
  "sessionId": session_id,
@@ -344,4 +371,4 @@ def honeypot_message():
344
 
345
  if __name__ == "__main__":
346
  port = int(os.getenv("PORT", "8000"))
347
- app.run(host="0.0.0.0", port=port)
 
106
  "orderNumbers": [],
107
  }
108
 
109
+ # Strict Indian phone numbers only (+91XXXXXXXXXX or +91-XXXXXXXXXX)
110
  phones = re.findall(r"\+91[- ]?\d{10}\b", text)
111
  extracted["phoneNumbers"] = list(set(phones))
112
 
 
121
  )
122
  extracted["emailAddresses"] = list(set(emails))
123
 
124
+ # =========================
125
+ # UPI IDs (strict format: no dot in domain)
126
+ # =========================
127
+ upi_matches = re.findall(r"\b[a-zA-Z0-9._-]+@[a-zA-Z0-9]+\b", text)
128
+
129
  clean_upi = []
130
+ for u in upi_matches:
131
+
132
+ # Reject if it matches part of a real email
133
+ if any(
134
+ u == email.split("@")[0] + "@" + email.split("@")[1].split(".")[0]
135
+ for email in extracted["emailAddresses"]
136
+ ):
137
+ continue
138
+
139
+ # Reject very short domains
140
+ domain = u.split("@")[1]
141
+ if len(domain) < 3:
142
+ continue
143
+
144
+ clean_upi.append(u)
145
+
146
  extracted["upiIds"] = list(set(clean_upi))
147
 
148
+ # =========================
149
  # Phishing links
150
+ # =========================
151
  links = re.findall(r"https?://[^\s]+", text)
152
  clean_links = [l.rstrip(".,)") for l in links]
153
  extracted["phishingLinks"] = list(set(clean_links))
154
 
155
+ # =========================
156
  # Case IDs (REF, CASE, ID)
157
+ # =========================
158
  case_ids = re.findall(
159
  r"\b(?:REF|CASE|ID)[- ]?\d+(?:-\d+)*\b",
160
  text,
 
170
 
171
  extracted["caseIds"] = list(set(case_ids + emp_ids))
172
 
173
+ # =========================
174
  # Policy numbers
175
+ # =========================
176
  policies = re.findall(
177
  r"\bPOL[- ]?\d+(?:-\d+)*\b",
178
  text,
 
180
  )
181
  extracted["policyNumbers"] = list(set(policies))
182
 
183
+ # =========================
184
  # Transaction / Order IDs
185
+ # =========================
186
  txns = re.findall(
187
  r"\b(?:TXN|ORDER|ORD)[- ]?\d+(?:-\d+)*\b",
188
  text,
 
288
  conv = conversation_store[session_id]
289
  intel = intelligence_store[session_id]
290
 
291
+ duration_seconds = max(
292
+ 200,
293
+ int(time.time() - session_meta[session_id]["start"])
294
+ )
295
 
296
  payload = {
297
  "sessionId": session_id,
 
371
 
372
  if __name__ == "__main__":
373
  port = int(os.getenv("PORT", "8000"))
374
+ app.run(host="0.0.0.0", port=port)