| from fastapi import FastAPI |
| from pydantic import BaseModel |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| import torch |
|
|
| app = FastAPI() |
|
|
| |
| MODEL_ID = "ealvaradob/bert-finetuned-phishing" |
|
|
| print("Loading model... This might take a minute as it's a 'large' BERT model.") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID) |
|
|
| class URLInput(BaseModel): |
| url: str |
|
|
| @app.get("/") |
| async def root(): |
| return {"status": "URL Phishing Detector API is running"} |
|
|
| @app.post("/predict") |
| async def predict_url(data: URLInput): |
| |
| if not data.url or len(data.url) < 4: |
| return {"error": "Invalid URL provided"} |
|
|
| |
| inputs = tokenizer(data.url, return_tensors="pt", truncation=True, max_length=512) |
| |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| |
| predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) |
| |
| probs = predictions[0].tolist() |
| |
| |
| |
| confidences = {model.config.id2label[i]: prob for i, prob in enumerate(probs)} |
| |
| |
| max_label = max(confidences.items(), key=lambda x: x[1]) |
| label_name = max_label[0] |
| |
| |
| is_phishing = "1" in label_name or "phishing" in label_name.lower() |
|
|
| return { |
| "url": data.url, |
| "prediction": "phishing" if is_phishing else "legitimate", |
| "confidence": round(max_label[1], 4), |
| "raw_scores": confidences, |
| "is_malicious": is_phishing |
| } |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| |
| uvicorn.run(app, host="0.0.0.0", port=7860) |