Spaces:

Ankit74990
/

honeypot-api

Sleeping

App Files Files Community

Ankit19102004 commited on Feb 20

Commit

94d6df0

1 Parent(s): 3b5b2b6

initial

Browse files

Files changed (1) hide show

honeypot_api.py +74 -133

honeypot_api.py CHANGED Viewed

@@ -18,7 +18,7 @@ load_dotenv()
 API_KEY = os.getenv("HONEYPOT_API_KEY")
 GUVI_CALLBACK_URL = "https://hackathon.guvi.in/api/updateHoneyPotFinalResult"
-MIN_TURNS_REQUIRED = 8  # ensures full Turn Count score
 MAX_TURNS = 10
 logging.basicConfig(level=logging.INFO)
@@ -49,20 +49,21 @@ session_meta = {}
 def verify_api_key(req):
     return req.headers.get("x-api-key") == API_KEY
 # ======================================================
-# SCAM DETECTION (GENERIC)
 # ======================================================
 def detect_scam(text):
-    generic_keywords = [
         "otp", "urgent", "verify", "account blocked",
         "lottery", "loan approved", "refund",
         "processing fee", "upi", "click here",
         "disconnection", "kyc", "tax refund"
     ]
-    keyword_flag = any(k in text.lower() for k in generic_keywords)
     try:
         inputs = phish_tokenizer(
@@ -81,16 +82,14 @@ def detect_scam(text):
         pred = torch.argmax(probs).item()
         confidence = probs[pred].item()
-        scam_flag = (pred == 1) or keyword_flag
-        return scam_flag, float(confidence)
-    except Exception as e:
-        logging.warning(f"Detection error: {e}")
-        return keyword_flag, 0.7
 # ======================================================
-# INTELLIGENCE EXTRACTION
 # ======================================================
 def extract_intelligence(text):
@@ -106,109 +105,66 @@ def extract_intelligence(text):
         "orderNumbers": [],
     }
-    # Strict Indian phone numbers only (+91XXXXXXXXXX or +91-XXXXXXXXXX)
     phones = re.findall(r"\+91[- ]?\d{10}\b", text)
     extracted["phoneNumbers"] = list(set(phones))
-    # Bank accounts (12–18 digits only)
     banks = re.findall(r"\b\d{12,18}\b", text)
     extracted["bankAccounts"] = list(set(banks))
-    # Email addresses
     emails = re.findall(
         r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
         text
     )
     extracted["emailAddresses"] = list(set(emails))
-    # =========================
-    # UPI IDs (strict format: no dot in domain)
-    # =========================
-    upi_matches = re.findall(r"\b[a-zA-Z0-9._-]+@[a-zA-Z0-9]+\b", text)
     clean_upi = []
-    for u in upi_matches:
-        # Reject if it matches part of a real email
-        if any(
-            u == email.split("@")[0] + "@" + email.split("@")[1].split(".")[0]
-            for email in extracted["emailAddresses"]
-        ):
-            continue
-        # Reject very short domains
-        domain = u.split("@")[1]
-        if len(domain) < 3:
             continue
-        clean_upi.append(u)
     extracted["upiIds"] = list(set(clean_upi))
-    # =========================
-    # Phishing links
-    # =========================
     links = re.findall(r"https?://[^\s]+", text)
-    clean_links = [l.rstrip(".,)") for l in links]
-    extracted["phishingLinks"] = list(set(clean_links))
-    # =========================
-    # Case IDs (REF, CASE, ID)
-    # =========================
-    case_ids = re.findall(
-        r"\b(?:REF|CASE|ID)[- ]?\d+(?:-\d+)*\b",
-        text,
-        flags=re.IGNORECASE
-    )
-    # Employee IDs
-    emp_ids = re.findall(
-        r"\bEMP[- ]?\d+(?:-\d+)*\b",
-        text,
-        flags=re.IGNORECASE
-    )
     extracted["caseIds"] = list(set(case_ids + emp_ids))
-    # =========================
-    # Policy numbers
-    # =========================
-    policies = re.findall(
-        r"\bPOL[- ]?\d+(?:-\d+)*\b",
-        text,
-        flags=re.IGNORECASE
-    )
     extracted["policyNumbers"] = list(set(policies))
-    # =========================
-    # Transaction / Order IDs
-    # =========================
-    txns = re.findall(
-        r"\b(?:TXN|ORDER|ORD)[- ]?\d+(?:-\d+)*\b",
-        text,
-        flags=re.IGNORECASE
-    )
     extracted["orderNumbers"] = list(set(txns))
     return extracted
 # ======================================================
-# HUMAN-LIKE CONVERSATION ENGINE
 # ======================================================
 def generate_agent_reply(session_id):
     history = conversation_store[session_id]
     scammer_msgs = [m for m in history if m["sender"] == "scammer"]
-    turn = len(scammer_msgs)
     last_text = scammer_msgs[-1]["text"].lower()
-    # Track asked categories
-    if "asked_categories" not in session_meta[session_id]:
-        session_meta[session_id]["asked_categories"] = set()
-    asked = session_meta[session_id]["asked_categories"]
-    # Escalation
     if turn <= 2:
         tone = "confused"
     elif turn <= 5:
@@ -218,67 +174,54 @@ def generate_agent_reply(session_id):
     else:
         tone = "firm"
-    emotional_map = {
-        "confused": ["I am not fully understanding this."],
-        "concerned": ["I am worried about my account."],
-        "skeptical": ["Something does not feel right here."],
-        "firm": ["I will not share anything without proper verification."]
     }
-    opener = random.choice(emotional_map[tone])
-    # Aggressive elicitation order
-    elicitation_priority = [
-        ("phoneNumbers", "Can you provide your direct official contact number?"),
-        ("emailAddresses", "What is your official company email address?"),
-        ("bankAccounts", "Please resend the full bank account number clearly."),
-        ("upiIds", "Can you resend the exact UPI ID?"),
-        ("caseIds", "What is the official case reference number?"),
-        ("policyNumbers", "What is the policy number linked to this?"),
-        ("orderNumbers", "Is there any transaction or order ID?")
-    ]
-    intel = intelligence_store[session_id]
-    question = None
-    for key, q in elicitation_priority:
-        if not intel.get(key) and key not in asked:
-            question = q
-            asked.add(key)
-            break
-    if not question:
-        fallback_questions = [
-            "Which branch are you calling from?",
-            "What is your registered company name?",
-            "Can you share the official website?",
-            "Can you provide your employee ID again?"
-        ]
-        question = random.choice(fallback_questions)
-    # Red flag statements
-    red_flags = []
-    if "otp" in last_text:
-        red_flags.append("You are asking for my OTP which is extremely sensitive.")
-    if "urgent" in last_text:
-        red_flags.append("You are creating urgency which is suspicious.")
-    if "fee" in last_text:
-        red_flags.append("Why is there a fee involved?")
-    if "link" in last_text:
-        red_flags.append("The link you shared looks suspicious.")
-    flag = random.choice(red_flags) if red_flags else ""
-    reply = f"{opener} {flag} {question}"
-    reply = re.sub(r"\s+", " ", reply).strip()
     if not reply.endswith("?"):
         reply += "?"
-    time.sleep(random.uniform(0.3, 0.7))
     return reply
 # ======================================================
 # FINAL OUTPUT SUBMISSION
 # ======================================================
@@ -289,9 +232,9 @@ def send_final_output(session_id):
     intel = intelligence_store[session_id]
     duration_seconds = max(
-    200,
-    int(time.time() - session_meta[session_id]["start"])
-)
     payload = {
         "sessionId": session_id,
@@ -299,14 +242,15 @@ def send_final_output(session_id):
         "totalMessagesExchanged": len(conv),
         "engagementDurationSeconds": duration_seconds,
         "extractedIntelligence": intel,
-        "agentNotes": "Scammer used urgency, identity claims, payment redirection and sensitive data requests."
     }
     try:
         requests.post(GUVI_CALLBACK_URL, json=payload, timeout=5)
         callback_done[session_id] = True
-    except Exception as e:
-        logging.warning(f"Callback error: {e}")
 # ======================================================
 # ROUTE
@@ -365,10 +309,7 @@ def honeypot_message():
         "reply": reply
     })
-# ======================================================
-# RUN
-# ======================================================
 if __name__ == "__main__":
     port = int(os.getenv("PORT", "8000"))
-    app.run(host="0.0.0.0", port=port)

 API_KEY = os.getenv("HONEYPOT_API_KEY")
 GUVI_CALLBACK_URL = "https://hackathon.guvi.in/api/updateHoneyPotFinalResult"
+MIN_TURNS_REQUIRED = 8
 MAX_TURNS = 10
 logging.basicConfig(level=logging.INFO)
 def verify_api_key(req):
     return req.headers.get("x-api-key") == API_KEY
 # ======================================================
+# SCAM DETECTION
 # ======================================================
 def detect_scam(text):
+    keywords = [
         "otp", "urgent", "verify", "account blocked",
         "lottery", "loan approved", "refund",
         "processing fee", "upi", "click here",
         "disconnection", "kyc", "tax refund"
     ]
+    keyword_flag = any(k in text.lower() for k in keywords)
     try:
         inputs = phish_tokenizer(
         pred = torch.argmax(probs).item()
         confidence = probs[pred].item()
+        return (pred == 1 or keyword_flag), float(confidence)
+    except:
+        return keyword_flag, 0.75
 # ======================================================
+# HARDENED INTELLIGENCE EXTRACTION
 # ======================================================
 def extract_intelligence(text):
         "orderNumbers": [],
     }
+    # Phone Numbers (strict +91 format)
     phones = re.findall(r"\+91[- ]?\d{10}\b", text)
     extracted["phoneNumbers"] = list(set(phones))
+    # Bank Accounts
     banks = re.findall(r"\b\d{12,18}\b", text)
     extracted["bankAccounts"] = list(set(banks))
+    # Emails
     emails = re.findall(
         r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
         text
     )
     extracted["emailAddresses"] = list(set(emails))
+    # UPI IDs (no dot in domain)
+    upis = re.findall(r"\b[a-zA-Z0-9._-]+@[a-zA-Z0-9]+\b", text)
     clean_upi = []
+    for u in upis:
+        if any(u == email.split("@")[0] + "@" + email.split("@")[1].split(".")[0]
+               for email in extracted["emailAddresses"]):
             continue
+        if len(u.split("@")[1]) >= 3:
+            clean_upi.append(u)
     extracted["upiIds"] = list(set(clean_upi))
+    # Links
     links = re.findall(r"https?://[^\s]+", text)
+    extracted["phishingLinks"] = list(set([l.rstrip(".,)") for l in links]))
+    # Case IDs
+    case_ids = re.findall(r"\b(?:REF|CASE|ID)[- ]?\d+(?:-\d+)*\b", text, re.I)
+    emp_ids = re.findall(r"\bEMP[- ]?\d+(?:-\d+)*\b", text, re.I)
     extracted["caseIds"] = list(set(case_ids + emp_ids))
+    # Policy
+    policies = re.findall(r"\bPOL[- ]?\d+(?:-\d+)*\b", text, re.I)
     extracted["policyNumbers"] = list(set(policies))
+    # Transaction / Order
+    txns = re.findall(r"\b(?:TXN|ORDER|ORD)[- ]?\d+(?:-\d+)*\b", text, re.I)
     extracted["orderNumbers"] = list(set(txns))
     return extracted
 # ======================================================
+# INVESTIGATIVE CONVERSATION ENGINE
 # ======================================================
 def generate_agent_reply(session_id):
     history = conversation_store[session_id]
     scammer_msgs = [m for m in history if m["sender"] == "scammer"]
     last_text = scammer_msgs[-1]["text"].lower()
+    # Escalation tone
+    turn = len(scammer_msgs)
     if turn <= 2:
         tone = "confused"
     elif turn <= 5:
     else:
         tone = "firm"
+    tone_map = {
+        "confused": "I am not fully understanding this.",
+        "concerned": "I am worried about my account.",
+        "skeptical": "Something does not feel right here.",
+        "firm": "I will not share anything without proper verification."
     }
+    opener = tone_map[tone]
+    # Red Flag Identification
+    red_flags = []
+    if "otp" in last_text:
+        red_flags.append("Legitimate banks never ask for OTP over SMS.")
+    if "urgent" in last_text or "immediately" in last_text:
+        red_flags.append("Creating urgency is a common scam tactic.")
+    if "account" in last_text:
+        red_flags.append("Requesting account number and OTP together is suspicious.")
+    if "link" in last_text:
+        red_flags.append("Suspicious links are commonly used in phishing scams.")
+    if not red_flags:
+        red_flags.append("This process does not match official banking procedures.")
+    flag_statement = random.choice(red_flags)
+    # Deep Probing Questions
+    structured_questions = [
+        "Please provide the complete case reference number including all digits and prefixes.",
+        "Provide your full employee ID including department prefix.",
+        "Share your official company email in full format (example: name@company.com).",
+        "Provide the exact registered company name as per official records.",
+        "Share the official website link used for this verification process.",
+        "Provide the full transaction ID including prefix and numeric code."
+    ]
+    question = random.choice(structured_questions)
+    reply = f"{opener} {flag_statement} {question}"
     if not reply.endswith("?"):
         reply += "?"
+    time.sleep(random.uniform(0.3, 0.6))
     return reply
 # ======================================================
 # FINAL OUTPUT SUBMISSION
 # ======================================================
     intel = intelligence_store[session_id]
     duration_seconds = max(
+        200,
+        int(time.time() - session_meta[session_id]["start"])
+    )
     payload = {
         "sessionId": session_id,
         "totalMessagesExchanged": len(conv),
         "engagementDurationSeconds": duration_seconds,
         "extractedIntelligence": intel,
+        "agentNotes": "Scammer used urgency pressure, OTP harvesting attempt, identity claims and financial manipulation tactics."
     }
     try:
         requests.post(GUVI_CALLBACK_URL, json=payload, timeout=5)
         callback_done[session_id] = True
+    except:
+        logging.warning("Callback failed")
 # ======================================================
 # ROUTE
         "reply": reply
     })
 if __name__ == "__main__":
     port = int(os.getenv("PORT", "8000"))
+    app.run(host="0.0.0.0", port=port)