Spaces:

Nick-2x
/

URL_PART

Sleeping

App Files Files Community

Nick-2x commited on 19 days ago

Commit

103e422

verified ·

1 Parent(s): 9c8c464

Create app.py

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+app = FastAPI()
+# NEW MODEL: Multimodal Phishing Detector (URLs, SMS, Email)
+MODEL_ID = "ealvaradob/bert-finetuned-phishing"
+print("Loading model... This might take a minute as it's a 'large' BERT model.")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
+class URLInput(BaseModel):
+    url: str
+@app.get("/")
+async def root():
+    return {"status": "URL Phishing Detector API is running"}
+@app.post("/predict")
+async def predict_url(data: URLInput):
+    # 1. Basic Pre-check
+    if not data.url or len(data.url) < 4:
+        return {"error": "Invalid URL provided"}
+    # 2. Tokenize and Predict
+    inputs = tokenizer(data.url, return_tensors="pt", truncation=True, max_length=512)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        # Apply Softmax to get percentages
+        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    probs = predictions[0].tolist()
+    # 3. Dynamic Label Mapping
+    # The model usually uses LABEL_0 (Legitimate) and LABEL_1 (Phishing)
+    confidences = {model.config.id2label[i]: prob for i, prob in enumerate(probs)}
+    # Identify the highest confidence label
+    max_label = max(confidences.items(), key=lambda x: x[1])
+    label_name = max_label[0]
+    # Check for "LABEL_1" or "phishing" keyword in the output
+    is_phishing = "1" in label_name or "phishing" in label_name.lower()
+    return {
+        "url": data.url,
+        "prediction": "phishing" if is_phishing else "legitimate",
+        "confidence": round(max_label[1], 4),
+        "raw_scores": confidences,
+        "is_malicious": is_phishing
+    }
+if __name__ == "__main__":
+    import uvicorn
+    # 7860 is the standard port for Hugging Face Spaces
+    uvicorn.run(app, host="0.0.0.0", port=7860)