Spaces:

Ankit74990
/

honeypot-api

Sleeping

App Files Files Community

Ankit19102004 commited on Feb 20

Commit

2d99416

1 Parent(s): 5e843fa

updates

Browse files

Files changed (1) hide show

honeypot_api.py +150 -314

honeypot_api.py CHANGED Viewed

@@ -1,14 +1,25 @@
 from flask import Flask, request, jsonify
-import torch, re, requests, random, time, os, logging
 from transformers import BertTokenizer, BertForSequenceClassification
-# ============================
-# CONFIG
-# ============================
 API_KEY = os.getenv("HONEYPOT_API_KEY")
 GUVI_CALLBACK_URL = "https://hackathon.guvi.in/api/updateHoneyPotFinalResult"
-MIN_MESSAGES_FOR_CALLBACK = 10
 logging.basicConfig(level=logging.INFO)
@@ -27,30 +38,31 @@ app = Flask(__name__)
 conversation_store = {}
 intelligence_store = {}
-callback_done = {}
 confidence_store = {}
-# ============================
-# VERIFY API KEY
-# ============================
 def verify_api_key(req):
     return req.headers.get("x-api-key") == API_KEY
-# ============================
-# SCAM DETECTION (SAFE)
-# ============================
 def detect_scam(text):
-    text_lower = text.lower()
-    suspicious_keywords = [
-        "otp", "account blocked", "verify", "urgent",
         "lottery", "loan approved", "refund",
-        "upi payment", "processing fee", "click here"
     ]
-    keyword_flag = any(k in text_lower for k in suspicious_keywords)
     try:
         inputs = phish_tokenizer(
@@ -63,340 +75,162 @@ def detect_scam(text):
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
-            out = phish_model(**inputs)
-        probs = torch.softmax(out.logits, dim=1)[0]
         pred = torch.argmax(probs).item()
-        conf = probs[pred].item()
-        model_flag = (pred == 1 and conf > 0.60)
-        return (model_flag or keyword_flag), float(conf)
-    except:
         return keyword_flag, 0.7
-# ============================
-# MAX INTELLIGENCE EXTRACTION
-# ============================
 def extract_intelligence(text):
     patterns = {
         "bankAccounts": r"\b\d{12,18}\b",
-        "phoneNumbers": r"(?:\+?\d{1,3}[- ]?)?\d{10}\b",
-        "emailAddresses": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]+",
-        "phishingLinks": r"https?://[^\s]+",
         "upiIds": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z]+",
-        "cardNumbers": r"\b(?:\d{4}[- ]?){3}\d{4}\b",
-        "ifscCodes": r"\b[A-Z]{4}0[A-Z0-9]{6}\b",
-        "transactionIds": r"\b[A-Z0-9]{8,20}\b",
-        "caseIds": r"(?:\b(?:CASE|CAS|REF|ID|TICKET)[- ]?[A-Z0-9]{4,}\b|\bC\d{4,}\b)",
-        "policyNumbers": r"\b(?:POLICY|POL|PL|INS)[- ]?[A-Z0-9]{4,}\b",
-        "orderNumbers": r"\b(?:ORDER|ORD|OD)[- ]?[A-Z0-9]{4,}\b",
-        "telegramHandles": r"@[a-zA-Z0-9_]{5,}",
     }
-    extracted = {
-        "phoneNumbers": [],
-        "bankAccounts": [],
-        "upiIds": [],
-        "phishingLinks": [],
-        "emailAddresses": [],
-        "caseIds": [],
-        "policyNumbers": [],
-        "orderNumbers": [],
-    }
     for key, pattern in patterns.items():
         matches = re.findall(pattern, text)
         if matches:
-            if isinstance(matches[0], tuple):
-                matches = ["".join(m) for m in matches]
-            matches = list(set(matches))
-            if key in extracted:
-                extracted[key].extend(matches)
-            # Merge extra financial or reference IDs into bankAccounts
-            if key in ["cardNumbers", "transactionIds", "policyNumbers", "orderNumbers"]:
-                extracted["bankAccounts"].extend(matches)
-    for k in extracted:
-        extracted[k] = list(set(extracted[k]))
-    clean_bank = []
-    for acc in extracted["bankAccounts"]:
-        digits = re.sub(r"\D", "", acc)
-        if 12 <= len(digits) <= 18:
-            clean_bank.append(digits)
-    extracted["bankAccounts"] = list(set(clean_bank))
-    bank_digits_list = extracted["bankAccounts"]
-    clean_phones = []
-    for ph in extracted["phoneNumbers"]:
-        d = re.sub(r"\D", "", ph)
-        if len(d) != 10:
-            continue
-        if any(d in b for b in bank_digits_list):
-            continue
-        clean_phones.append(ph)
-    extracted["phoneNumbers"] = list(set(clean_phones))
     return extracted
-# ============================
-# ENGAGEMENT ENGINE (OPTIMIZED)
-# ============================
 def generate_agent_reply(session_id):
     history = conversation_store[session_id]
-    turn = len([m for m in history if m["sender"] == "scammer"])
-    last_scammer_text = ""
-    for m in reversed(history):
-        if m["sender"] == "scammer":
-            last_scammer_text = m["text"]
-            break
-    text_lower = last_scammer_text.lower()
-    intel_so_far = intelligence_store.get(session_id, {})
-    missing_type = None
-    info_priority = [
-        "phoneNumbers",
-        "bankAccounts",
-        "upiIds",
-        "emailAddresses",
-        "phishingLinks",
-        "caseIds",
-        "orderNumbers",
-        "policyNumbers",
-    ]
-    for t in info_priority:
-        if not intel_so_far.get(t):
-            missing_type = t
-            break
-    info_prompt = ""
-    if missing_type == "phoneNumbers":
-        info_prompt = " Also, can you share your official contact phone number so that I can call and verify this?"
-    elif missing_type == "bankAccounts":
-        info_prompt = " Also, can you clearly write the full bank account number and account holder name where this money is supposed to go?"
-    elif missing_type == "upiIds":
-        info_prompt = " Also, please send the exact UPI ID with correct spelling so that I do not send money to the wrong place."
-    elif missing_type == "emailAddresses":
-        info_prompt = " Is there any official support email where I can write if something goes wrong?"
-    elif missing_type == "phishingLinks":
-        info_prompt = " Is there an official link or page from my bank where I can read about this process?"
-    elif missing_type == "caseIds":
-        info_prompt = " Can you share the official case or reference ID so that I can mention it if I talk to the branch?"
-    elif missing_type == "orderNumbers":
-        info_prompt = " Can you share any order or reference number that is connected to this payment?"
-    elif missing_type == "policyNumbers":
-        info_prompt = " Can you share any policy number that this issue is linked to?"
-    upi_hint = None
-    email_hint = None
-    amount_hint = None
-    upi_match = re.search(r"[a-zA-Z0-9.\-_+]+@[a-zA-Z]+", last_scammer_text)
-    if upi_match:
-        upi_hint = upi_match.group(0)
-    email_match = re.search(r"[a-zA-Z0-9.\-_+]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]+", last_scammer_text)
-    if email_match:
-        email_hint = email_match.group(0)
-    amount_match = re.search(r"rs\.?\s*([\d,]+)", text_lower)
-    if amount_match:
-        amount_hint = amount_match.group(1)
-    otp_flag = "otp" in text_lower
-    fee_flag = "fee" in text_lower or "charges" in text_lower or "processing" in text_lower
-    account_flag = "account" in text_lower
-    link_flag = "http://" in text_lower or "https://" in text_lower or "link" in text_lower
-    if upi_hint:
-        reply = (
-            f"I see you are asking me to send money to UPI ID {upi_hint}. "
-            "I am not comfortable sending any payment until I can verify this is really from the bank. "
-            "Can you share an official way I can confirm that this UPI ID actually belongs to your organisation?"
-        )
-    elif otp_flag:
-        otp_replies = [
-            "You are asking for my OTP and that makes me very uncomfortable. I was always told never to share an OTP with anyone. Why do you need my OTP at all if you already have my details?",
-            "I really do not feel safe sharing any OTP with you. If you are truly from the bank, why can you not verify me in some other way?",
-            "Everyone says that sharing an OTP is the fastest way to lose money. Can you explain why you still need my OTP if you already have my account details?",
-            "This feels risky because you keep insisting on the OTP. Can you clearly show me any official bank message that says I should share my OTP like this?",
-        ]
-        idx = min(turn, len(otp_replies) - 1)
-        reply = otp_replies[idx]
-    elif fee_flag or amount_hint:
-        if amount_hint:
-            reply = (
-                f"You mentioned a payment of around Rs.{amount_hint} plus extra charges. "
-                "This sounds unusual for a security check. "
-                "Can you explain clearly why this amount is required and whether there is any official receipt?"
-            )
-        else:
-            reply = (
-                "You keep talking about fees and charges and I do not fully understand them. "
-                "Can you break down every fee and confirm if there are any hidden costs?"
-            )
-    elif link_flag:
-        reply = (
-            "You are asking me to trust this without showing me any proper website or link I can verify. "
-            "Can you give me an official page from my bank's website where this process is explained clearly?"
-        )
-    elif account_flag:
-        reply = (
-            "You keep mentioning my account but I still do not know if you are really from the bank. "
-            "Can you prove your identity in some official way before I share any account details?"
-        )
     else:
-        generic_questions = [
-            "Can you explain step by step what exactly you want me to do?",
-            "Is there any other safe way to handle this without me sharing sensitive details right now?",
-            "Can you clearly confirm how my account will be affected if I wait a bit?",
-            "Can you tell me which branch or department you are actually calling from?",
         ]
-        reply = random.choice(generic_questions)
-    reply = reply.strip()
-    if info_prompt:
-        reply = reply + " " + info_prompt.strip()
-    if not reply.endswith("?"):
-        reply += "?"
-    time.sleep(random.uniform(0.4, 0.9))
-    return reply
-# ============================
-# ENGAGEMENT SCORING
-# ============================
-def compute_engagement_score(session_id):
-    conv = conversation_store.get(session_id, [])
-    total = len(conv)
-    if total == 0:
-        return 0
-    agent_msgs = [m for m in conv if m["sender"] == "agent"]
-    scammer_msgs = [m for m in conv if m["sender"] == "scammer"]
-    depth_score = min(1.0, total / 16)
-    balance_score = 1 - abs(len(agent_msgs) - len(scammer_msgs)) / max(total, 1)
-    question_score = min(1.0, sum(m["text"].count("?") for m in agent_msgs) / len(agent_msgs))
-    persistence_score = min(1.0, len(scammer_msgs) / 10)
-    final = 100 * (
-        0.3 * depth_score +
-        0.25 * balance_score +
-        0.25 * question_score +
-        0.2 * persistence_score
-    )
-    return round(final, 2)
-def infer_scam_type(session_id):
-    conv = conversation_store.get(session_id, [])
-    text_all = " ".join(m["text"].lower() for m in conv if m["sender"] == "scammer")
-    if any(k in text_all for k in ["upi", "gpay", "paytm", "@ok", "@ybl", "@upi"]):
-        return "upi_fraud"
-    if any(k in text_all for k in ["http://", "https://", "link", ".com", ".in"]):
-        return "phishing"
-    if any(k in text_all for k in ["loan", "emi", "interest", "approval"]):
-        return "loan_scam"
-    if any(k in text_all for k in ["lottery", "jackpot", "prize"]):
-        return "lottery_scam"
-    if any(k in text_all for k in ["kyc", "aadhaar", "aadhar", "pan", "verification"]):
-        return "kyc_fraud"
-    if any(k in text_all for k in ["income tax", "tax refund", "itr"]):
-        return "tax_scam"
-    if any(k in text_all for k in ["electricity", "power bill", "disconnection"]):
-        return "utility_bill_scam"
-    if any(k in text_all for k in ["sbi", "hdfc", "icici", "axis", "bank", "account"]):
-        return "bank_fraud"
-    return "generic_scam"
-# ============================
-# CALLBACK (STRICT FORMAT)
-# ============================
-def send_callback(session_id):
     conv = conversation_store[session_id]
-    engagement = compute_engagement_score(session_id)
     intel = intelligence_store[session_id]
-    scammer_count = len([m for m in conv if m["sender"] == "scammer"])
-    duration_seconds = max(240, scammer_count * 24)
-    conf_values = confidence_store.get(session_id, [])
-    if conf_values:
-        avg_conf = sum(conf_values) / len(conf_values)
-    else:
-        avg_conf = 0.7
-    if avg_conf >= 0.8:
-        confidence_level = "HIGH"
-    elif avg_conf >= 0.5:
-        confidence_level = "MEDIUM"
-    else:
-        confidence_level = "LOW"
     payload = {
-        "status": "success",
         "sessionId": session_id,
         "scamDetected": True,
         "totalMessagesExchanged": len(conv),
         "engagementDurationSeconds": duration_seconds,
-        "scamType": infer_scam_type(session_id),
-        "confidenceLevel": confidence_level,
-        "extractedIntelligence": {
-            "phoneNumbers": intel["phoneNumbers"],
-            "bankAccounts": intel["bankAccounts"],
-            "upiIds": intel["upiIds"],
-            "phishingLinks": intel["phishingLinks"],
-            "emailAddresses": intel["emailAddresses"],
-            "caseIds": intel.get("caseIds", []),
-            "policyNumbers": intel.get("policyNumbers", []),
-            "orderNumbers": intel.get("orderNumbers", []),
-        },
-        "engagementMetrics": {
-            "totalMessagesExchanged": len(conv),
-            "engagementDurationSeconds": duration_seconds,
-            "engagementScore": round(engagement)
-        },
-        "agentNotes": "Adaptive psychological engagement used to prolong conversation."
     }
     try:
         requests.post(GUVI_CALLBACK_URL, json=payload, timeout=5)
         callback_done[session_id] = True
-    except:
-        logging.warning("Callback failed")
-# ============================
-# ROUTES
-# ============================
-@app.route("/", methods=["GET"])
-def index():
-    return "Honeypot API is running", 200
-@app.route("/honeypot", methods=["POST"])
 @app.route("/honeypot/message", methods=["POST"])
 def honeypot_message():
@@ -404,7 +238,8 @@ def honeypot_message():
         return jsonify({"error": "Unauthorized"}), 401
     data = request.get_json()
-    session_id = data.get("sessionId", "default")
     text = data["message"]["text"]
     if session_id not in conversation_store:
@@ -417,41 +252,42 @@ def honeypot_message():
             "emailAddresses": [],
             "caseIds": [],
             "policyNumbers": [],
-            "orderNumbers": [],
         }
-        callback_done[session_id] = False
         confidence_store[session_id] = []
     conversation_store[session_id].append({"sender": "scammer", "text": text})
-    scam, conf = detect_scam(text)
-    confidence_store[session_id].append(conf)
-    intel = extract_intelligence(text)
-    for k in intel:
         intelligence_store[session_id][k] = list(
-            set(intelligence_store[session_id][k] + intel[k])
         )
     reply = generate_agent_reply(session_id)
     conversation_store[session_id].append({"sender": "agent", "text": reply})
-    if scam and not callback_done[session_id]:
-        scammer_msgs = [m for m in conversation_store[session_id] if m["sender"] == "scammer"]
-        if len(scammer_msgs) >= MIN_MESSAGES_FOR_CALLBACK:
-            send_callback(session_id)
-    engagement = compute_engagement_score(session_id)
     return jsonify({
         "status": "success",
-        "scamDetected": scam,
-        "confidence": round(conf, 3),
-        "reply": reply,
-        "engagementScore": round(engagement)
     })
 if __name__ == "__main__":
     port = int(os.getenv("PORT", "8000"))
-    app.run(host="0.0.0.0", port=port)

 from flask import Flask, request, jsonify
+import torch
+import re
+import requests
+import random
+import time
+import os
+import logging
 from transformers import BertTokenizer, BertForSequenceClassification
+from dotenv import load_dotenv
+# ======================================================
+# CONFIGURATION
+# ======================================================
+load_dotenv()
 API_KEY = os.getenv("HONEYPOT_API_KEY")
 GUVI_CALLBACK_URL = "https://hackathon.guvi.in/api/updateHoneyPotFinalResult"
+MIN_TURNS_REQUIRED = 8  # ensures full Turn Count score
+MAX_TURNS = 10
 logging.basicConfig(level=logging.INFO)
 conversation_store = {}
 intelligence_store = {}
 confidence_store = {}
+callback_done = {}
+session_meta = {}
+# ======================================================
+# API KEY VERIFICATION
+# ======================================================
 def verify_api_key(req):
     return req.headers.get("x-api-key") == API_KEY
+# ======================================================
+# SCAM DETECTION (GENERIC)
+# ======================================================
 def detect_scam(text):
+    generic_keywords = [
+        "otp", "urgent", "verify", "account blocked",
         "lottery", "loan approved", "refund",
+        "processing fee", "upi", "click here",
+        "disconnection", "kyc", "tax refund"
     ]
+    keyword_flag = any(k in text.lower() for k in generic_keywords)
     try:
         inputs = phish_tokenizer(
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
+            outputs = phish_model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=1)[0]
         pred = torch.argmax(probs).item()
+        confidence = probs[pred].item()
+        scam_flag = (pred == 1) or keyword_flag
+        return scam_flag, float(confidence)
+    except Exception as e:
+        logging.warning(f"Detection error: {e}")
         return keyword_flag, 0.7
+# ======================================================
+# INTELLIGENCE EXTRACTION
+# ======================================================
 def extract_intelligence(text):
     patterns = {
+        "phoneNumbers": r"\b\+?\d{1,3}[- ]?\d{10}\b",
         "bankAccounts": r"\b\d{12,18}\b",
         "upiIds": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z]+",
+        "phishingLinks": r"https?://[^\s]+",
+        "emailAddresses": r"[a-zA-Z0-9.\-_+]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]+",
+        "caseIds": r"\b(?:CASE|REF|ID|TICKET)[- ]?[A-Z0-9]{4,}\b",
+        "policyNumbers": r"\b(?:POLICY|POL|INS)[- ]?[A-Z0-9]{4,}\b",
+        "orderNumbers": r"\b(?:ORDER|ORD)[- ]?[A-Z0-9]{4,}\b",
     }
+    extracted = {k: [] for k in patterns}
     for key, pattern in patterns.items():
         matches = re.findall(pattern, text)
         if matches:
+            extracted[key] = list(set(matches))
     return extracted
+# ======================================================
+# HUMAN-LIKE CONVERSATION ENGINE
+# ======================================================
 def generate_agent_reply(session_id):
     history = conversation_store[session_id]
+    scammer_msgs = [m for m in history if m["sender"] == "scammer"]
+    turn = len(scammer_msgs)
+    last_text = scammer_msgs[-1]["text"].lower()
+    # Escalation Phases
+    if turn <= 2:
+        phase = "confused"
+    elif turn <= 5:
+        phase = "concerned"
+    elif turn <= 8:
+        phase = "skeptical"
     else:
+        phase = "firm"
+    emotional_map = {
+        "confused": [
+            "I am not fully understanding this.",
+            "This is confusing to me."
+        ],
+        "concerned": [
+            "I am worried about my account.",
+            "This situation feels risky."
+        ],
+        "skeptical": [
+            "Something does not feel right here.",
+            "I am starting to doubt this."
+        ],
+        "firm": [
+            "Before I proceed, I need proper proof.",
+            "I will not share anything without verification."
         ]
+    }
+    red_flags = []
+    if "otp" in last_text:
+        red_flags.append("You are asking for my OTP which is extremely sensitive.")
+    if "urgent" in last_text:
+        red_flags.append("You are creating urgency which is suspicious.")
+    if "fee" in last_text:
+        red_flags.append("Why is there a fee before resolving this?")
+    if "link" in last_text:
+        red_flags.append("The link you shared looks suspicious.")
+    if "upi" in last_text:
+        red_flags.append("I am unsure about this UPI ID.")
+    opener = random.choice(emotional_map[phase])
+    flag_statement = random.choice(red_flags) if red_flags else ""
+    investigative_questions = [
+        "Can you provide your official employee ID?",
+        "What is your branch location?",
+        "Can you share your direct contact number?",
+        "Is there an official website I can verify?",
+        "What is the reference or case ID?",
+        "Please resend the full bank account details clearly.",
+        "What is the registered company name?"
+    ]
+    question = random.choice(investigative_questions)
+    structure_type = random.choice(["short", "medium", "long"])
+    if structure_type == "short":
+        reply = f"{opener} {question}"
+    elif structure_type == "medium":
+        reply = f"{opener} {flag_statement} {question}"
+    else:
+        reply = f"{opener} {flag_statement} If this is genuine, why is this different from standard procedure? {question}"
+    reply = re.sub(r"\s+", " ", reply).strip()
+    if not reply.endswith("?"):
+        reply += "?"
+    time.sleep(random.uniform(0.3, 0.8))
+    return reply
+# ======================================================
+# FINAL OUTPUT SUBMISSION
+# ======================================================
+def send_final_output(session_id):
     conv = conversation_store[session_id]
     intel = intelligence_store[session_id]
+    duration_seconds = int(time.time() - session_meta[session_id]["start"])
     payload = {
         "sessionId": session_id,
         "scamDetected": True,
         "totalMessagesExchanged": len(conv),
         "engagementDurationSeconds": duration_seconds,
+        "extractedIntelligence": intel,
+        "agentNotes": "Scammer used urgency, identity claims, payment redirection and sensitive data requests."
     }
     try:
         requests.post(GUVI_CALLBACK_URL, json=payload, timeout=5)
         callback_done[session_id] = True
+    except Exception as e:
+        logging.warning(f"Callback error: {e}")
+# ======================================================
+# ROUTE
+# ======================================================
 @app.route("/honeypot/message", methods=["POST"])
 def honeypot_message():
         return jsonify({"error": "Unauthorized"}), 401
     data = request.get_json()
+    session_id = data["sessionId"]
     text = data["message"]["text"]
     if session_id not in conversation_store:
             "emailAddresses": [],
             "caseIds": [],
             "policyNumbers": [],
+            "orderNumbers": []
         }
         confidence_store[session_id] = []
+        callback_done[session_id] = False
+        session_meta[session_id] = {"start": time.time()}
     conversation_store[session_id].append({"sender": "scammer", "text": text})
+    scam, confidence = detect_scam(text)
+    confidence_store[session_id].append(confidence)
+    extracted = extract_intelligence(text)
+    for k in extracted:
         intelligence_store[session_id][k] = list(
+            set(intelligence_store[session_id][k] + extracted[k])
         )
     reply = generate_agent_reply(session_id)
     conversation_store[session_id].append({"sender": "agent", "text": reply})
+    scammer_turns = len([m for m in conversation_store[session_id] if m["sender"] == "scammer"])
+    if scam and not callback_done[session_id] and scammer_turns >= MIN_TURNS_REQUIRED:
+        send_final_output(session_id)
     return jsonify({
         "status": "success",
+        "reply": reply
     })
+# ======================================================
+# RUN
+# ======================================================
 if __name__ == "__main__":
     port = int(os.getenv("PORT", "8000"))
+    app.run(host="0.0.0.0", port=port)