Ankit19102004 commited on
Commit
2d99416
·
1 Parent(s): 5e843fa
Files changed (1) hide show
  1. honeypot_api.py +150 -314
honeypot_api.py CHANGED
@@ -1,14 +1,25 @@
1
  from flask import Flask, request, jsonify
2
- import torch, re, requests, random, time, os, logging
 
 
 
 
 
 
3
  from transformers import BertTokenizer, BertForSequenceClassification
 
4
 
5
- # ============================
6
- # CONFIG
7
- # ============================
 
 
8
 
9
  API_KEY = os.getenv("HONEYPOT_API_KEY")
10
  GUVI_CALLBACK_URL = "https://hackathon.guvi.in/api/updateHoneyPotFinalResult"
11
- MIN_MESSAGES_FOR_CALLBACK = 10
 
 
12
 
13
  logging.basicConfig(level=logging.INFO)
14
 
@@ -27,30 +38,31 @@ app = Flask(__name__)
27
 
28
  conversation_store = {}
29
  intelligence_store = {}
30
- callback_done = {}
31
  confidence_store = {}
 
 
32
 
33
- # ============================
34
- # VERIFY API KEY
35
- # ============================
36
 
37
  def verify_api_key(req):
38
  return req.headers.get("x-api-key") == API_KEY
39
 
40
- # ============================
41
- # SCAM DETECTION (SAFE)
42
- # ============================
43
 
44
  def detect_scam(text):
45
- text_lower = text.lower()
46
 
47
- suspicious_keywords = [
48
- "otp", "account blocked", "verify", "urgent",
49
  "lottery", "loan approved", "refund",
50
- "upi payment", "processing fee", "click here"
 
51
  ]
52
 
53
- keyword_flag = any(k in text_lower for k in suspicious_keywords)
54
 
55
  try:
56
  inputs = phish_tokenizer(
@@ -63,340 +75,162 @@ def detect_scam(text):
63
  inputs = {k: v.to(device) for k, v in inputs.items()}
64
 
65
  with torch.no_grad():
66
- out = phish_model(**inputs)
67
 
68
- probs = torch.softmax(out.logits, dim=1)[0]
69
  pred = torch.argmax(probs).item()
70
- conf = probs[pred].item()
71
 
72
- model_flag = (pred == 1 and conf > 0.60)
73
 
74
- return (model_flag or keyword_flag), float(conf)
75
 
76
- except:
 
77
  return keyword_flag, 0.7
78
 
79
- # ============================
80
- # MAX INTELLIGENCE EXTRACTION
81
- # ============================
82
 
83
  def extract_intelligence(text):
84
 
85
  patterns = {
 
86
  "bankAccounts": r"\b\d{12,18}\b",
87
- "phoneNumbers": r"(?:\+?\d{1,3}[- ]?)?\d{10}\b",
88
- "emailAddresses": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]+",
89
- "phishingLinks": r"https?://[^\s]+",
90
  "upiIds": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z]+",
91
- "cardNumbers": r"\b(?:\d{4}[- ]?){3}\d{4}\b",
92
- "ifscCodes": r"\b[A-Z]{4}0[A-Z0-9]{6}\b",
93
- "transactionIds": r"\b[A-Z0-9]{8,20}\b",
94
- "caseIds": r"(?:\b(?:CASE|CAS|REF|ID|TICKET)[- ]?[A-Z0-9]{4,}\b|\bC\d{4,}\b)",
95
- "policyNumbers": r"\b(?:POLICY|POL|PL|INS)[- ]?[A-Z0-9]{4,}\b",
96
- "orderNumbers": r"\b(?:ORDER|ORD|OD)[- ]?[A-Z0-9]{4,}\b",
97
- "telegramHandles": r"@[a-zA-Z0-9_]{5,}",
98
  }
99
 
100
- extracted = {
101
- "phoneNumbers": [],
102
- "bankAccounts": [],
103
- "upiIds": [],
104
- "phishingLinks": [],
105
- "emailAddresses": [],
106
- "caseIds": [],
107
- "policyNumbers": [],
108
- "orderNumbers": [],
109
- }
110
 
111
  for key, pattern in patterns.items():
112
  matches = re.findall(pattern, text)
113
  if matches:
114
- if isinstance(matches[0], tuple):
115
- matches = ["".join(m) for m in matches]
116
- matches = list(set(matches))
117
-
118
- if key in extracted:
119
- extracted[key].extend(matches)
120
-
121
- # Merge extra financial or reference IDs into bankAccounts
122
- if key in ["cardNumbers", "transactionIds", "policyNumbers", "orderNumbers"]:
123
- extracted["bankAccounts"].extend(matches)
124
-
125
- for k in extracted:
126
- extracted[k] = list(set(extracted[k]))
127
-
128
- clean_bank = []
129
- for acc in extracted["bankAccounts"]:
130
- digits = re.sub(r"\D", "", acc)
131
- if 12 <= len(digits) <= 18:
132
- clean_bank.append(digits)
133
- extracted["bankAccounts"] = list(set(clean_bank))
134
-
135
- bank_digits_list = extracted["bankAccounts"]
136
- clean_phones = []
137
- for ph in extracted["phoneNumbers"]:
138
- d = re.sub(r"\D", "", ph)
139
- if len(d) != 10:
140
- continue
141
- if any(d in b for b in bank_digits_list):
142
- continue
143
- clean_phones.append(ph)
144
- extracted["phoneNumbers"] = list(set(clean_phones))
145
 
146
  return extracted
147
 
148
- # ============================
149
- # ENGAGEMENT ENGINE (OPTIMIZED)
150
- # ============================
151
 
152
  def generate_agent_reply(session_id):
153
 
154
  history = conversation_store[session_id]
155
- turn = len([m for m in history if m["sender"] == "scammer"])
156
-
157
- last_scammer_text = ""
158
- for m in reversed(history):
159
- if m["sender"] == "scammer":
160
- last_scammer_text = m["text"]
161
- break
162
-
163
- text_lower = last_scammer_text.lower()
164
-
165
- intel_so_far = intelligence_store.get(session_id, {})
166
- missing_type = None
167
- info_priority = [
168
- "phoneNumbers",
169
- "bankAccounts",
170
- "upiIds",
171
- "emailAddresses",
172
- "phishingLinks",
173
- "caseIds",
174
- "orderNumbers",
175
- "policyNumbers",
176
- ]
177
- for t in info_priority:
178
- if not intel_so_far.get(t):
179
- missing_type = t
180
- break
181
-
182
- info_prompt = ""
183
- if missing_type == "phoneNumbers":
184
- info_prompt = " Also, can you share your official contact phone number so that I can call and verify this?"
185
- elif missing_type == "bankAccounts":
186
- info_prompt = " Also, can you clearly write the full bank account number and account holder name where this money is supposed to go?"
187
- elif missing_type == "upiIds":
188
- info_prompt = " Also, please send the exact UPI ID with correct spelling so that I do not send money to the wrong place."
189
- elif missing_type == "emailAddresses":
190
- info_prompt = " Is there any official support email where I can write if something goes wrong?"
191
- elif missing_type == "phishingLinks":
192
- info_prompt = " Is there an official link or page from my bank where I can read about this process?"
193
- elif missing_type == "caseIds":
194
- info_prompt = " Can you share the official case or reference ID so that I can mention it if I talk to the branch?"
195
- elif missing_type == "orderNumbers":
196
- info_prompt = " Can you share any order or reference number that is connected to this payment?"
197
- elif missing_type == "policyNumbers":
198
- info_prompt = " Can you share any policy number that this issue is linked to?"
199
-
200
- upi_hint = None
201
- email_hint = None
202
- amount_hint = None
203
-
204
- upi_match = re.search(r"[a-zA-Z0-9.\-_+]+@[a-zA-Z]+", last_scammer_text)
205
- if upi_match:
206
- upi_hint = upi_match.group(0)
207
-
208
- email_match = re.search(r"[a-zA-Z0-9.\-_+]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]+", last_scammer_text)
209
- if email_match:
210
- email_hint = email_match.group(0)
211
-
212
- amount_match = re.search(r"rs\.?\s*([\d,]+)", text_lower)
213
- if amount_match:
214
- amount_hint = amount_match.group(1)
215
-
216
- otp_flag = "otp" in text_lower
217
- fee_flag = "fee" in text_lower or "charges" in text_lower or "processing" in text_lower
218
- account_flag = "account" in text_lower
219
- link_flag = "http://" in text_lower or "https://" in text_lower or "link" in text_lower
220
-
221
- if upi_hint:
222
- reply = (
223
- f"I see you are asking me to send money to UPI ID {upi_hint}. "
224
- "I am not comfortable sending any payment until I can verify this is really from the bank. "
225
- "Can you share an official way I can confirm that this UPI ID actually belongs to your organisation?"
226
- )
227
- elif otp_flag:
228
- otp_replies = [
229
- "You are asking for my OTP and that makes me very uncomfortable. I was always told never to share an OTP with anyone. Why do you need my OTP at all if you already have my details?",
230
- "I really do not feel safe sharing any OTP with you. If you are truly from the bank, why can you not verify me in some other way?",
231
- "Everyone says that sharing an OTP is the fastest way to lose money. Can you explain why you still need my OTP if you already have my account details?",
232
- "This feels risky because you keep insisting on the OTP. Can you clearly show me any official bank message that says I should share my OTP like this?",
233
- ]
234
- idx = min(turn, len(otp_replies) - 1)
235
- reply = otp_replies[idx]
236
- elif fee_flag or amount_hint:
237
- if amount_hint:
238
- reply = (
239
- f"You mentioned a payment of around Rs.{amount_hint} plus extra charges. "
240
- "This sounds unusual for a security check. "
241
- "Can you explain clearly why this amount is required and whether there is any official receipt?"
242
- )
243
- else:
244
- reply = (
245
- "You keep talking about fees and charges and I do not fully understand them. "
246
- "Can you break down every fee and confirm if there are any hidden costs?"
247
- )
248
- elif link_flag:
249
- reply = (
250
- "You are asking me to trust this without showing me any proper website or link I can verify. "
251
- "Can you give me an official page from my bank's website where this process is explained clearly?"
252
- )
253
- elif account_flag:
254
- reply = (
255
- "You keep mentioning my account but I still do not know if you are really from the bank. "
256
- "Can you prove your identity in some official way before I share any account details?"
257
- )
258
  else:
259
- generic_questions = [
260
- "Can you explain step by step what exactly you want me to do?",
261
- "Is there any other safe way to handle this without me sharing sensitive details right now?",
262
- "Can you clearly confirm how my account will be affected if I wait a bit?",
263
- "Can you tell me which branch or department you are actually calling from?",
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  ]
265
- reply = random.choice(generic_questions)
266
-
267
- reply = reply.strip()
268
- if info_prompt:
269
- reply = reply + " " + info_prompt.strip()
270
-
271
- if not reply.endswith("?"):
272
- reply += "?"
273
-
274
- time.sleep(random.uniform(0.4, 0.9))
275
-
276
- return reply
277
-
278
- # ============================
279
- # ENGAGEMENT SCORING
280
- # ============================
281
-
282
- def compute_engagement_score(session_id):
283
-
284
- conv = conversation_store.get(session_id, [])
285
- total = len(conv)
286
-
287
- if total == 0:
288
- return 0
289
 
290
- agent_msgs = [m for m in conv if m["sender"] == "agent"]
291
- scammer_msgs = [m for m in conv if m["sender"] == "scammer"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
- depth_score = min(1.0, total / 16)
294
- balance_score = 1 - abs(len(agent_msgs) - len(scammer_msgs)) / max(total, 1)
295
- question_score = min(1.0, sum(m["text"].count("?") for m in agent_msgs) / len(agent_msgs))
296
- persistence_score = min(1.0, len(scammer_msgs) / 10)
297
 
298
- final = 100 * (
299
- 0.3 * depth_score +
300
- 0.25 * balance_score +
301
- 0.25 * question_score +
302
- 0.2 * persistence_score
303
- )
304
 
305
- return round(final, 2)
 
 
 
 
 
306
 
 
307
 
308
- def infer_scam_type(session_id):
 
309
 
310
- conv = conversation_store.get(session_id, [])
311
- text_all = " ".join(m["text"].lower() for m in conv if m["sender"] == "scammer")
312
 
313
- if any(k in text_all for k in ["upi", "gpay", "paytm", "@ok", "@ybl", "@upi"]):
314
- return "upi_fraud"
315
- if any(k in text_all for k in ["http://", "https://", "link", ".com", ".in"]):
316
- return "phishing"
317
- if any(k in text_all for k in ["loan", "emi", "interest", "approval"]):
318
- return "loan_scam"
319
- if any(k in text_all for k in ["lottery", "jackpot", "prize"]):
320
- return "lottery_scam"
321
- if any(k in text_all for k in ["kyc", "aadhaar", "aadhar", "pan", "verification"]):
322
- return "kyc_fraud"
323
- if any(k in text_all for k in ["income tax", "tax refund", "itr"]):
324
- return "tax_scam"
325
- if any(k in text_all for k in ["electricity", "power bill", "disconnection"]):
326
- return "utility_bill_scam"
327
- if any(k in text_all for k in ["sbi", "hdfc", "icici", "axis", "bank", "account"]):
328
- return "bank_fraud"
329
- return "generic_scam"
330
 
331
- # ============================
332
- # CALLBACK (STRICT FORMAT)
333
- # ============================
334
 
335
- def send_callback(session_id):
336
 
337
  conv = conversation_store[session_id]
338
- engagement = compute_engagement_score(session_id)
339
  intel = intelligence_store[session_id]
340
 
341
- scammer_count = len([m for m in conv if m["sender"] == "scammer"])
342
- duration_seconds = max(240, scammer_count * 24)
343
-
344
- conf_values = confidence_store.get(session_id, [])
345
- if conf_values:
346
- avg_conf = sum(conf_values) / len(conf_values)
347
- else:
348
- avg_conf = 0.7
349
-
350
- if avg_conf >= 0.8:
351
- confidence_level = "HIGH"
352
- elif avg_conf >= 0.5:
353
- confidence_level = "MEDIUM"
354
- else:
355
- confidence_level = "LOW"
356
 
357
  payload = {
358
- "status": "success",
359
  "sessionId": session_id,
360
  "scamDetected": True,
361
  "totalMessagesExchanged": len(conv),
362
  "engagementDurationSeconds": duration_seconds,
363
- "scamType": infer_scam_type(session_id),
364
- "confidenceLevel": confidence_level,
365
- "extractedIntelligence": {
366
- "phoneNumbers": intel["phoneNumbers"],
367
- "bankAccounts": intel["bankAccounts"],
368
- "upiIds": intel["upiIds"],
369
- "phishingLinks": intel["phishingLinks"],
370
- "emailAddresses": intel["emailAddresses"],
371
- "caseIds": intel.get("caseIds", []),
372
- "policyNumbers": intel.get("policyNumbers", []),
373
- "orderNumbers": intel.get("orderNumbers", []),
374
- },
375
- "engagementMetrics": {
376
- "totalMessagesExchanged": len(conv),
377
- "engagementDurationSeconds": duration_seconds,
378
- "engagementScore": round(engagement)
379
- },
380
- "agentNotes": "Adaptive psychological engagement used to prolong conversation."
381
  }
382
 
383
  try:
384
  requests.post(GUVI_CALLBACK_URL, json=payload, timeout=5)
385
  callback_done[session_id] = True
386
- except:
387
- logging.warning("Callback failed")
388
-
389
- # ============================
390
- # ROUTES
391
- # ============================
392
-
393
- @app.route("/", methods=["GET"])
394
- def index():
395
-
396
- return "Honeypot API is running", 200
397
 
 
 
 
398
 
399
- @app.route("/honeypot", methods=["POST"])
400
  @app.route("/honeypot/message", methods=["POST"])
401
  def honeypot_message():
402
 
@@ -404,7 +238,8 @@ def honeypot_message():
404
  return jsonify({"error": "Unauthorized"}), 401
405
 
406
  data = request.get_json()
407
- session_id = data.get("sessionId", "default")
 
408
  text = data["message"]["text"]
409
 
410
  if session_id not in conversation_store:
@@ -417,41 +252,42 @@ def honeypot_message():
417
  "emailAddresses": [],
418
  "caseIds": [],
419
  "policyNumbers": [],
420
- "orderNumbers": [],
421
  }
422
- callback_done[session_id] = False
423
  confidence_store[session_id] = []
 
 
424
 
425
  conversation_store[session_id].append({"sender": "scammer", "text": text})
426
 
427
- scam, conf = detect_scam(text)
428
- confidence_store[session_id].append(conf)
429
 
430
- intel = extract_intelligence(text)
431
- for k in intel:
 
432
  intelligence_store[session_id][k] = list(
433
- set(intelligence_store[session_id][k] + intel[k])
434
  )
435
 
436
  reply = generate_agent_reply(session_id)
437
 
438
  conversation_store[session_id].append({"sender": "agent", "text": reply})
439
 
440
- if scam and not callback_done[session_id]:
441
- scammer_msgs = [m for m in conversation_store[session_id] if m["sender"] == "scammer"]
442
- if len(scammer_msgs) >= MIN_MESSAGES_FOR_CALLBACK:
443
- send_callback(session_id)
444
 
445
- engagement = compute_engagement_score(session_id)
 
446
 
447
  return jsonify({
448
  "status": "success",
449
- "scamDetected": scam,
450
- "confidence": round(conf, 3),
451
- "reply": reply,
452
- "engagementScore": round(engagement)
453
  })
454
 
 
 
 
 
455
  if __name__ == "__main__":
456
  port = int(os.getenv("PORT", "8000"))
457
- app.run(host="0.0.0.0", port=port)
 
1
  from flask import Flask, request, jsonify
2
+ import torch
3
+ import re
4
+ import requests
5
+ import random
6
+ import time
7
+ import os
8
+ import logging
9
  from transformers import BertTokenizer, BertForSequenceClassification
10
+ from dotenv import load_dotenv
11
 
12
+ # ======================================================
13
+ # CONFIGURATION
14
+ # ======================================================
15
+
16
+ load_dotenv()
17
 
18
  API_KEY = os.getenv("HONEYPOT_API_KEY")
19
  GUVI_CALLBACK_URL = "https://hackathon.guvi.in/api/updateHoneyPotFinalResult"
20
+
21
+ MIN_TURNS_REQUIRED = 8 # ensures full Turn Count score
22
+ MAX_TURNS = 10
23
 
24
  logging.basicConfig(level=logging.INFO)
25
 
 
38
 
39
  conversation_store = {}
40
  intelligence_store = {}
 
41
  confidence_store = {}
42
+ callback_done = {}
43
+ session_meta = {}
44
 
45
+ # ======================================================
46
+ # API KEY VERIFICATION
47
+ # ======================================================
48
 
49
  def verify_api_key(req):
50
  return req.headers.get("x-api-key") == API_KEY
51
 
52
+ # ======================================================
53
+ # SCAM DETECTION (GENERIC)
54
+ # ======================================================
55
 
56
  def detect_scam(text):
 
57
 
58
+ generic_keywords = [
59
+ "otp", "urgent", "verify", "account blocked",
60
  "lottery", "loan approved", "refund",
61
+ "processing fee", "upi", "click here",
62
+ "disconnection", "kyc", "tax refund"
63
  ]
64
 
65
+ keyword_flag = any(k in text.lower() for k in generic_keywords)
66
 
67
  try:
68
  inputs = phish_tokenizer(
 
75
  inputs = {k: v.to(device) for k, v in inputs.items()}
76
 
77
  with torch.no_grad():
78
+ outputs = phish_model(**inputs)
79
 
80
+ probs = torch.softmax(outputs.logits, dim=1)[0]
81
  pred = torch.argmax(probs).item()
82
+ confidence = probs[pred].item()
83
 
84
+ scam_flag = (pred == 1) or keyword_flag
85
 
86
+ return scam_flag, float(confidence)
87
 
88
+ except Exception as e:
89
+ logging.warning(f"Detection error: {e}")
90
  return keyword_flag, 0.7
91
 
92
+ # ======================================================
93
+ # INTELLIGENCE EXTRACTION
94
+ # ======================================================
95
 
96
  def extract_intelligence(text):
97
 
98
  patterns = {
99
+ "phoneNumbers": r"\b\+?\d{1,3}[- ]?\d{10}\b",
100
  "bankAccounts": r"\b\d{12,18}\b",
 
 
 
101
  "upiIds": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z]+",
102
+ "phishingLinks": r"https?://[^\s]+",
103
+ "emailAddresses": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]+",
104
+ "caseIds": r"\b(?:CASE|REF|ID|TICKET)[- ]?[A-Z0-9]{4,}\b",
105
+ "policyNumbers": r"\b(?:POLICY|POL|INS)[- ]?[A-Z0-9]{4,}\b",
106
+ "orderNumbers": r"\b(?:ORDER|ORD)[- ]?[A-Z0-9]{4,}\b",
 
 
107
  }
108
 
109
+ extracted = {k: [] for k in patterns}
 
 
 
 
 
 
 
 
 
110
 
111
  for key, pattern in patterns.items():
112
  matches = re.findall(pattern, text)
113
  if matches:
114
+ extracted[key] = list(set(matches))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  return extracted
117
 
118
+ # ======================================================
119
+ # HUMAN-LIKE CONVERSATION ENGINE
120
+ # ======================================================
121
 
122
  def generate_agent_reply(session_id):
123
 
124
  history = conversation_store[session_id]
125
+ scammer_msgs = [m for m in history if m["sender"] == "scammer"]
126
+ turn = len(scammer_msgs)
127
+
128
+ last_text = scammer_msgs[-1]["text"].lower()
129
+
130
+ # Escalation Phases
131
+ if turn <= 2:
132
+ phase = "confused"
133
+ elif turn <= 5:
134
+ phase = "concerned"
135
+ elif turn <= 8:
136
+ phase = "skeptical"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  else:
138
+ phase = "firm"
139
+
140
+ emotional_map = {
141
+ "confused": [
142
+ "I am not fully understanding this.",
143
+ "This is confusing to me."
144
+ ],
145
+ "concerned": [
146
+ "I am worried about my account.",
147
+ "This situation feels risky."
148
+ ],
149
+ "skeptical": [
150
+ "Something does not feel right here.",
151
+ "I am starting to doubt this."
152
+ ],
153
+ "firm": [
154
+ "Before I proceed, I need proper proof.",
155
+ "I will not share anything without verification."
156
  ]
157
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ red_flags = []
160
+ if "otp" in last_text:
161
+ red_flags.append("You are asking for my OTP which is extremely sensitive.")
162
+ if "urgent" in last_text:
163
+ red_flags.append("You are creating urgency which is suspicious.")
164
+ if "fee" in last_text:
165
+ red_flags.append("Why is there a fee before resolving this?")
166
+ if "link" in last_text:
167
+ red_flags.append("The link you shared looks suspicious.")
168
+ if "upi" in last_text:
169
+ red_flags.append("I am unsure about this UPI ID.")
170
+
171
+ opener = random.choice(emotional_map[phase])
172
+ flag_statement = random.choice(red_flags) if red_flags else ""
173
+
174
+ investigative_questions = [
175
+ "Can you provide your official employee ID?",
176
+ "What is your branch location?",
177
+ "Can you share your direct contact number?",
178
+ "Is there an official website I can verify?",
179
+ "What is the reference or case ID?",
180
+ "Please resend the full bank account details clearly.",
181
+ "What is the registered company name?"
182
+ ]
183
 
184
+ question = random.choice(investigative_questions)
 
 
 
185
 
186
+ structure_type = random.choice(["short", "medium", "long"])
 
 
 
 
 
187
 
188
+ if structure_type == "short":
189
+ reply = f"{opener} {question}"
190
+ elif structure_type == "medium":
191
+ reply = f"{opener} {flag_statement} {question}"
192
+ else:
193
+ reply = f"{opener} {flag_statement} If this is genuine, why is this different from standard procedure? {question}"
194
 
195
+ reply = re.sub(r"\s+", " ", reply).strip()
196
 
197
+ if not reply.endswith("?"):
198
+ reply += "?"
199
 
200
+ time.sleep(random.uniform(0.3, 0.8))
 
201
 
202
+ return reply
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
+ # ======================================================
205
+ # FINAL OUTPUT SUBMISSION
206
+ # ======================================================
207
 
208
+ def send_final_output(session_id):
209
 
210
  conv = conversation_store[session_id]
 
211
  intel = intelligence_store[session_id]
212
 
213
+ duration_seconds = int(time.time() - session_meta[session_id]["start"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  payload = {
 
216
  "sessionId": session_id,
217
  "scamDetected": True,
218
  "totalMessagesExchanged": len(conv),
219
  "engagementDurationSeconds": duration_seconds,
220
+ "extractedIntelligence": intel,
221
+ "agentNotes": "Scammer used urgency, identity claims, payment redirection and sensitive data requests."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  }
223
 
224
  try:
225
  requests.post(GUVI_CALLBACK_URL, json=payload, timeout=5)
226
  callback_done[session_id] = True
227
+ except Exception as e:
228
+ logging.warning(f"Callback error: {e}")
 
 
 
 
 
 
 
 
 
229
 
230
+ # ======================================================
231
+ # ROUTE
232
+ # ======================================================
233
 
 
234
  @app.route("/honeypot/message", methods=["POST"])
235
  def honeypot_message():
236
 
 
238
  return jsonify({"error": "Unauthorized"}), 401
239
 
240
  data = request.get_json()
241
+
242
+ session_id = data["sessionId"]
243
  text = data["message"]["text"]
244
 
245
  if session_id not in conversation_store:
 
252
  "emailAddresses": [],
253
  "caseIds": [],
254
  "policyNumbers": [],
255
+ "orderNumbers": []
256
  }
 
257
  confidence_store[session_id] = []
258
+ callback_done[session_id] = False
259
+ session_meta[session_id] = {"start": time.time()}
260
 
261
  conversation_store[session_id].append({"sender": "scammer", "text": text})
262
 
263
+ scam, confidence = detect_scam(text)
264
+ confidence_store[session_id].append(confidence)
265
 
266
+ extracted = extract_intelligence(text)
267
+
268
+ for k in extracted:
269
  intelligence_store[session_id][k] = list(
270
+ set(intelligence_store[session_id][k] + extracted[k])
271
  )
272
 
273
  reply = generate_agent_reply(session_id)
274
 
275
  conversation_store[session_id].append({"sender": "agent", "text": reply})
276
 
277
+ scammer_turns = len([m for m in conversation_store[session_id] if m["sender"] == "scammer"])
 
 
 
278
 
279
+ if scam and not callback_done[session_id] and scammer_turns >= MIN_TURNS_REQUIRED:
280
+ send_final_output(session_id)
281
 
282
  return jsonify({
283
  "status": "success",
284
+ "reply": reply
 
 
 
285
  })
286
 
287
+ # ======================================================
288
+ # RUN
289
+ # ======================================================
290
+
291
  if __name__ == "__main__":
292
  port = int(os.getenv("PORT", "8000"))
293
+ app.run(host="0.0.0.0", port=port)