Spaces:

Alaudeen
/

nids-deployment

Runtime error

App Files Files Community

nids-deployment / app.py

Alaudeen

Upload app.py with huggingface_hub

70fd4ab verified 1 day ago

raw

history blame contribute delete

10.9 kB

	"""Hugging Face Spaces app for NIDS deployment.

	This app downloads pre-trained models from Hugging Face Hub and serves
	a Gradio interface for real-time network intrusion detection.
	"""

	import os
	import sys
	import json
	import numpy as np
	import joblib
	import gradio as gr

	MODELS_REPO = "Alaudeen/nids-models"
	MODELS_DIR = "outputs/models"
	os.makedirs(MODELS_DIR, exist_ok=True)

	# Download models from HF Hub if not present locally
	models = {}


	def download_models():
	"""Download models from HF Hub if they don't exist locally."""
	try:
	from huggingface_hub import hf_hub_download
	except ImportError:
	print("huggingface_hub not installed, models must exist locally")
	return

	model_files = [
	"XGBoost.joblib",
	"RandomForest.joblib",
	"IsolationForest_Unsupervised.joblib",
	"MLP.pt",
	"LSTM.pt",
	"Transformer.pt",
	"Autoencoder.pt"
	]

	for fname in model_files:
	local_path = os.path.join(MODELS_DIR, fname)
	if not os.path.exists(local_path):
	try:
	print(f"Downloading {fname} from {MODELS_REPO}...")
	hf_hub_download(
	repo_id=MODELS_REPO,
	filename=fname,
	repo_type="model",
	local_dir=MODELS_DIR,
	local_dir_use_symlinks=False
	)
	print(f" Downloaded: {fname}")
	except Exception as e:
	print(f" Failed to download {fname}: {e}")


	def load_models():
	"""Load all available models."""
	global models
	models.clear()
	for fname in sorted(os.listdir(MODELS_DIR)):
	path = os.path.join(MODELS_DIR, fname)
	if fname.endswith(".joblib"):
	name = fname.replace(".joblib", "")
	try:
	models[name] = joblib.load(path)
	print(f"Loaded: {name}")
	except Exception as e:
	print(f"Failed to load {name}: {e}")
	elif fname.endswith(".pt"):
	name = fname.replace(".pt", "")
	models[name] = path # Store path, load on-demand
	print(f"Found: {name}")


	# Download and load
	print("Initializing NIDS Space...")
	download_models()
	load_models()
	print(f"Models available: {list(models.keys())}")

	# Sample flows
	SAMPLE_NORMAL = [0, 1, 45, 0, 491, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	1, 1, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2, 2, 0.0, 0.0,
	0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.17, 0.03]

	SAMPLE_ANOMALY = [0, 1, 44, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	1, 1, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 123, 6, 1.0, 1.0,
	0.0, 0.0, 0.05, 0.07, 0.0, 0.0, 0.1, 0.05]

	# Pad to 41
	SAMPLE_NORMAL = SAMPLE_NORMAL[:41]
	SAMPLE_ANOMALY = SAMPLE_ANOMALY[:41]


	def detect_single(features_text: str, model_name: str) -> str:
	"""Detect intrusion on a single flow."""
	try:
	features = [float(x.strip()) for x in features_text.split(",")]
	except Exception as e:
	return f"❌ Error parsing features: {e}"

	if len(features) != 41:
	return f"❌ Expected 41 features, got {len(features)}"

	if model_name not in models:
	return f"❌ Model '{model_name}' not available. Loaded: {list(models.keys())}"

	# ML models (joblib)
	if model_name in models and hasattr(models[model_name], 'predict'):
	model = models[model_name]
	X = np.array(features).reshape(1, -1)
	pred = int(model.predict(X)[0])
	proba = model.predict_proba(X)[0] if hasattr(model, "predict_proba") else [0.5, 0.5]
	confidence = float(proba[pred])
	else:
	return f"⚠️ Model {model_name} not loaded (path only)"

	if pred == 0:
	level = "✅ SAFE — Normal Traffic"
	elif confidence > 0.9:
	level = f"🔴 CRITICAL THREAT — Anomaly Detected (Confidence: {confidence:.1%})"
	elif confidence > 0.75:
	level = f"🟠 HIGH THREAT — Anomaly Detected (Confidence: {confidence:.1%})"
	else:
	level = f"🟡 MEDIUM THREAT — Suspicious Activity (Confidence: {confidence:.1%})"

	return level


	def detect_batch(batch_text: str, model_name: str) -> str:
	"""Batch detection on multiple flows."""
	lines = [l.strip() for l in batch_text.strip().split("\n") if l.strip()]
	flows = []
	for line in lines:
	try:
	vals = [float(x.strip()) for x in line.split(",")]
	if len(vals) == 41:
	flows.append(vals)
	except:
	continue

	if not flows:
	return "❌ No valid 41-feature flows found."

	if model_name not in models or not hasattr(models[model_name], 'predict'):
	return f"❌ Model '{model_name}' not available."

	model = models[model_name]
	X = np.array(flows)
	preds = model.predict(X)

	normals = int(sum(preds == 0))
	anomalies = int(sum(preds == 1))

	return (
	f"Batch Detection Results\n\n"
	f"- Total Flows: {len(flows)}\n"
	f"- ✅ Normal: {normals} ({normals/len(flows)*100:.1f}%)\n"
	f"- 🚨 Anomalies: {anomalies} ({anomalies/len(flows)*100:.1f}%)\n"
	f"- Model: {model_name}"
	)


	def show_results() -> str:
	"""Show model performance table."""
	return """
	## Model Performance (NSL-KDD Dataset)

	\| Model \| Accuracy \| Macro F1 \| AUC-ROC \| Type \|
	\|-------\|----------\|----------\|---------\|------\|
	\| XGBoost \| 76.18% \| 76.04% \| 95.75% \| Supervised \|
	\| RandomForest \| 73.10% \| 73.05% \| 95.34% \| Supervised \|
	\| MLP \| 73.28% \| 73.21% \| 89.33% \| Supervised \|
	\| Autoencoder \| 71.84% \| 71.34% \| 73.60% \| Unsupervised \|
	\| LSTM \| 70.65% \| 70.58% \| 87.80% \| Unsupervised \|
	\| Transformer \| 57.94% \| 57.26% \| 80.29% \| Supervised \|
	\| IsolationForest \| 56.55% \| 55.96% \| 65.24% \| Unsupervised \|

	Key Insight: XGBoost achieves the best performance (95.75% AUC-ROC) and runs at ~1ms latency per flow, making it ideal for real-time deployment.
	"""


	# Build Gradio interface
	with gr.Blocks(title="🛡️ Network Intrusion Detection System") as demo:
	gr.Markdown("""
	# 🛡️ Network Intrusion Detection System (NIDS)

	Detect network intrusions in real-time using ML models trained on the NSL-KDD dataset.
	Enter 41 comma-separated network flow features to classify as Normal or Anomaly.

	Models from: [Alaudeen/nids-models](https://huggingface.co/Alaudeen/nids-models)
	""")

	with gr.Tab("🔍 Single Flow Detection"):
	with gr.Row():
	with gr.Column(scale=2):
	feature_input = gr.Textbox(
	label="Flow Features (41 comma-separated values)",
	value=",".join(map(str, SAMPLE_ANOMALY)),
	lines=2,
	placeholder="Enter 41 NSL-KDD features..."
	)
	model_choice = gr.Dropdown(
	choices=list(models.keys()) if models else ["XGBoost", "RandomForest"],
	value="XGBoost",
	label="Detection Model",
	info="XGBoost is recommended (best accuracy + speed)"
	)
	detect_btn = gr.Button("🔍 Detect Intrusion", variant="primary", size="lg")

	with gr.Column(scale=1):
	result = gr.Textbox(
	label="Detection Result",
	lines=4,
	interactive=False
	)
	gr.Markdown("""
	Alert Levels:
	- 🟢 Safe — Normal traffic
	- 🟡 Medium — Suspicious activity
	- 🟠 High — Likely intrusion
	- 🔴 Critical — Confirmed attack
	""")

	detect_btn.click(
	detect_single,
	inputs=[feature_input, model_choice],
	outputs=result
	)

	with gr.Row():
	gr.Button("📋 Load Normal Sample").click(
	lambda: ",".join(map(str, SAMPLE_NORMAL)),
	outputs=feature_input
	)
	gr.Button("⚠️ Load Anomaly Sample").click(
	lambda: ",".join(map(str, SAMPLE_ANOMALY)),
	outputs=feature_input
	)

	with gr.Tab("📊 Batch Detection"):
	with gr.Row():
	with gr.Column(scale=2):
	batch_input = gr.Textbox(
	label="Batch Flows (one per line, 41 values each)",
	value=",".join(map(str, SAMPLE_NORMAL)) + "\n" +
	",".join(map(str, SAMPLE_ANOMALY)),
	lines=8
	)
	batch_model = gr.Dropdown(
	choices=list(models.keys()) if models else ["XGBoost", "RandomForest"],
	value="XGBoost",
	label="Model"
	)
	batch_btn = gr.Button("📊 Batch Detect", variant="primary")
	with gr.Column(scale=1):
	batch_result = gr.Markdown(label="Results")

	batch_btn.click(detect_batch, inputs=[batch_input, batch_model], outputs=batch_result)

	with gr.Tab("📈 Model Performance"):
	gr.Markdown(show_results())

	with gr.Tab("📖 API Documentation"):
	gr.Markdown("""
	## REST API Usage

	Deploy the FastAPI server locally:
	```bash
	pip install fastapi uvicorn
	uvicorn api:app --host 0.0.0.0 --port 8000
	```

	### Endpoints

	\| Endpoint \| Method \| Description \|
	\|----------\|--------\|-------------\|
	\| `/health` \| GET \| Health check \|
	\| `/models` \| GET \| List available models \|
	\| `/predict` \| POST \| Single flow detection \|
	\| `/predict/batch` \| POST \| Batch detection \|
	\| `/stats` \| GET \| Usage statistics \|
	\| `/sample` \| GET \| Sample flows \|

	### Example Request
	```bash
	curl -X POST http://localhost:8000/predict \\
	-H "Content-Type: application/json" \\
	-d '{
	"features": [0,1,45,0,491,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,2,2,0,0,0,0,1,0,0,0,0.17],
	"model": "XGBoost"
	}'
	```

	### Example Response
	```json
	{
	"flow_id": "flow_1",
	"prediction": 1,
	"confidence": 0.9634,
	"model": "XGBoost",
	"latency_ms": 2.77,
	"alert_level": "critical",
	"timestamp": 1778206436.82
	}
	```
	""")

	gr.Markdown("""
	---
	Project: [github.com/Alaudeen/nids](https://huggingface.co/Alaudeen/nids-models) \|
	Dataset: [Mireu-Lab/NSL-KDD](https://huggingface.co/datasets/Mireu-Lab/NSL-KDD) \|
	License: MIT
	""")

	if __name__ == "__main__":
	demo.launch()