Alaudeen commited on
Commit
70fd4ab
Β·
verified Β·
1 Parent(s): 4b78293

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +318 -0
app.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Hugging Face Spaces app for NIDS deployment.
2
+
3
+ This app downloads pre-trained models from Hugging Face Hub and serves
4
+ a Gradio interface for real-time network intrusion detection.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import numpy as np
11
+ import joblib
12
+ import gradio as gr
13
+
14
+ MODELS_REPO = "Alaudeen/nids-models"
15
+ MODELS_DIR = "outputs/models"
16
+ os.makedirs(MODELS_DIR, exist_ok=True)
17
+
18
+ # Download models from HF Hub if not present locally
19
+ models = {}
20
+
21
+
22
+ def download_models():
23
+ """Download models from HF Hub if they don't exist locally."""
24
+ try:
25
+ from huggingface_hub import hf_hub_download
26
+ except ImportError:
27
+ print("huggingface_hub not installed, models must exist locally")
28
+ return
29
+
30
+ model_files = [
31
+ "XGBoost.joblib",
32
+ "RandomForest.joblib",
33
+ "IsolationForest_Unsupervised.joblib",
34
+ "MLP.pt",
35
+ "LSTM.pt",
36
+ "Transformer.pt",
37
+ "Autoencoder.pt"
38
+ ]
39
+
40
+ for fname in model_files:
41
+ local_path = os.path.join(MODELS_DIR, fname)
42
+ if not os.path.exists(local_path):
43
+ try:
44
+ print(f"Downloading {fname} from {MODELS_REPO}...")
45
+ hf_hub_download(
46
+ repo_id=MODELS_REPO,
47
+ filename=fname,
48
+ repo_type="model",
49
+ local_dir=MODELS_DIR,
50
+ local_dir_use_symlinks=False
51
+ )
52
+ print(f" Downloaded: {fname}")
53
+ except Exception as e:
54
+ print(f" Failed to download {fname}: {e}")
55
+
56
+
57
+ def load_models():
58
+ """Load all available models."""
59
+ global models
60
+ models.clear()
61
+ for fname in sorted(os.listdir(MODELS_DIR)):
62
+ path = os.path.join(MODELS_DIR, fname)
63
+ if fname.endswith(".joblib"):
64
+ name = fname.replace(".joblib", "")
65
+ try:
66
+ models[name] = joblib.load(path)
67
+ print(f"Loaded: {name}")
68
+ except Exception as e:
69
+ print(f"Failed to load {name}: {e}")
70
+ elif fname.endswith(".pt"):
71
+ name = fname.replace(".pt", "")
72
+ models[name] = path # Store path, load on-demand
73
+ print(f"Found: {name}")
74
+
75
+
76
+ # Download and load
77
+ print("Initializing NIDS Space...")
78
+ download_models()
79
+ load_models()
80
+ print(f"Models available: {list(models.keys())}")
81
+
82
+ # Sample flows
83
+ SAMPLE_NORMAL = [0, 1, 45, 0, 491, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84
+ 1, 1, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2, 2, 0.0, 0.0,
85
+ 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.17, 0.03]
86
+
87
+ SAMPLE_ANOMALY = [0, 1, 44, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88
+ 1, 1, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 123, 6, 1.0, 1.0,
89
+ 0.0, 0.0, 0.05, 0.07, 0.0, 0.0, 0.1, 0.05]
90
+
91
+ # Pad to 41
92
+ SAMPLE_NORMAL = SAMPLE_NORMAL[:41]
93
+ SAMPLE_ANOMALY = SAMPLE_ANOMALY[:41]
94
+
95
+
96
+ def detect_single(features_text: str, model_name: str) -> str:
97
+ """Detect intrusion on a single flow."""
98
+ try:
99
+ features = [float(x.strip()) for x in features_text.split(",")]
100
+ except Exception as e:
101
+ return f"❌ Error parsing features: {e}"
102
+
103
+ if len(features) != 41:
104
+ return f"❌ Expected 41 features, got {len(features)}"
105
+
106
+ if model_name not in models:
107
+ return f"❌ Model '{model_name}' not available. Loaded: {list(models.keys())}"
108
+
109
+ # ML models (joblib)
110
+ if model_name in models and hasattr(models[model_name], 'predict'):
111
+ model = models[model_name]
112
+ X = np.array(features).reshape(1, -1)
113
+ pred = int(model.predict(X)[0])
114
+ proba = model.predict_proba(X)[0] if hasattr(model, "predict_proba") else [0.5, 0.5]
115
+ confidence = float(proba[pred])
116
+ else:
117
+ return f"⚠️ Model {model_name} not loaded (path only)"
118
+
119
+ if pred == 0:
120
+ level = "βœ… SAFE β€” Normal Traffic"
121
+ elif confidence > 0.9:
122
+ level = f"πŸ”΄ CRITICAL THREAT β€” Anomaly Detected (Confidence: {confidence:.1%})"
123
+ elif confidence > 0.75:
124
+ level = f"🟠 HIGH THREAT β€” Anomaly Detected (Confidence: {confidence:.1%})"
125
+ else:
126
+ level = f"🟑 MEDIUM THREAT β€” Suspicious Activity (Confidence: {confidence:.1%})"
127
+
128
+ return level
129
+
130
+
131
+ def detect_batch(batch_text: str, model_name: str) -> str:
132
+ """Batch detection on multiple flows."""
133
+ lines = [l.strip() for l in batch_text.strip().split("\n") if l.strip()]
134
+ flows = []
135
+ for line in lines:
136
+ try:
137
+ vals = [float(x.strip()) for x in line.split(",")]
138
+ if len(vals) == 41:
139
+ flows.append(vals)
140
+ except:
141
+ continue
142
+
143
+ if not flows:
144
+ return "❌ No valid 41-feature flows found."
145
+
146
+ if model_name not in models or not hasattr(models[model_name], 'predict'):
147
+ return f"❌ Model '{model_name}' not available."
148
+
149
+ model = models[model_name]
150
+ X = np.array(flows)
151
+ preds = model.predict(X)
152
+
153
+ normals = int(sum(preds == 0))
154
+ anomalies = int(sum(preds == 1))
155
+
156
+ return (
157
+ f"**Batch Detection Results**\n\n"
158
+ f"- Total Flows: {len(flows)}\n"
159
+ f"- βœ… Normal: {normals} ({normals/len(flows)*100:.1f}%)\n"
160
+ f"- 🚨 Anomalies: {anomalies} ({anomalies/len(flows)*100:.1f}%)\n"
161
+ f"- Model: {model_name}"
162
+ )
163
+
164
+
165
+ def show_results() -> str:
166
+ """Show model performance table."""
167
+ return """
168
+ ## Model Performance (NSL-KDD Dataset)
169
+
170
+ | Model | Accuracy | Macro F1 | AUC-ROC | Type |
171
+ |-------|----------|----------|---------|------|
172
+ | **XGBoost** | 76.18% | 76.04% | 95.75% | Supervised |
173
+ | RandomForest | 73.10% | 73.05% | 95.34% | Supervised |
174
+ | MLP | 73.28% | 73.21% | 89.33% | Supervised |
175
+ | Autoencoder | 71.84% | 71.34% | 73.60% | Unsupervised |
176
+ | LSTM | 70.65% | 70.58% | 87.80% | Unsupervised |
177
+ | Transformer | 57.94% | 57.26% | 80.29% | Supervised |
178
+ | IsolationForest | 56.55% | 55.96% | 65.24% | Unsupervised |
179
+
180
+ **Key Insight:** XGBoost achieves the best performance (95.75% AUC-ROC) and runs at ~1ms latency per flow, making it ideal for real-time deployment.
181
+ """
182
+
183
+
184
+ # Build Gradio interface
185
+ with gr.Blocks(title="πŸ›‘οΈ Network Intrusion Detection System") as demo:
186
+ gr.Markdown("""
187
+ # πŸ›‘οΈ Network Intrusion Detection System (NIDS)
188
+
189
+ Detect network intrusions in real-time using ML models trained on the **NSL-KDD** dataset.
190
+ Enter 41 comma-separated network flow features to classify as **Normal** or **Anomaly**.
191
+
192
+ **Models from:** [Alaudeen/nids-models](https://huggingface.co/Alaudeen/nids-models)
193
+ """)
194
+
195
+ with gr.Tab("πŸ” Single Flow Detection"):
196
+ with gr.Row():
197
+ with gr.Column(scale=2):
198
+ feature_input = gr.Textbox(
199
+ label="Flow Features (41 comma-separated values)",
200
+ value=",".join(map(str, SAMPLE_ANOMALY)),
201
+ lines=2,
202
+ placeholder="Enter 41 NSL-KDD features..."
203
+ )
204
+ model_choice = gr.Dropdown(
205
+ choices=list(models.keys()) if models else ["XGBoost", "RandomForest"],
206
+ value="XGBoost",
207
+ label="Detection Model",
208
+ info="XGBoost is recommended (best accuracy + speed)"
209
+ )
210
+ detect_btn = gr.Button("πŸ” Detect Intrusion", variant="primary", size="lg")
211
+
212
+ with gr.Column(scale=1):
213
+ result = gr.Textbox(
214
+ label="Detection Result",
215
+ lines=4,
216
+ interactive=False
217
+ )
218
+ gr.Markdown("""
219
+ **Alert Levels:**
220
+ - 🟒 **Safe** β€” Normal traffic
221
+ - 🟑 **Medium** β€” Suspicious activity
222
+ - 🟠 **High** β€” Likely intrusion
223
+ - πŸ”΄ **Critical** β€” Confirmed attack
224
+ """)
225
+
226
+ detect_btn.click(
227
+ detect_single,
228
+ inputs=[feature_input, model_choice],
229
+ outputs=result
230
+ )
231
+
232
+ with gr.Row():
233
+ gr.Button("πŸ“‹ Load Normal Sample").click(
234
+ lambda: ",".join(map(str, SAMPLE_NORMAL)),
235
+ outputs=feature_input
236
+ )
237
+ gr.Button("⚠️ Load Anomaly Sample").click(
238
+ lambda: ",".join(map(str, SAMPLE_ANOMALY)),
239
+ outputs=feature_input
240
+ )
241
+
242
+ with gr.Tab("πŸ“Š Batch Detection"):
243
+ with gr.Row():
244
+ with gr.Column(scale=2):
245
+ batch_input = gr.Textbox(
246
+ label="Batch Flows (one per line, 41 values each)",
247
+ value=",".join(map(str, SAMPLE_NORMAL)) + "\n" +
248
+ ",".join(map(str, SAMPLE_ANOMALY)),
249
+ lines=8
250
+ )
251
+ batch_model = gr.Dropdown(
252
+ choices=list(models.keys()) if models else ["XGBoost", "RandomForest"],
253
+ value="XGBoost",
254
+ label="Model"
255
+ )
256
+ batch_btn = gr.Button("πŸ“Š Batch Detect", variant="primary")
257
+ with gr.Column(scale=1):
258
+ batch_result = gr.Markdown(label="Results")
259
+
260
+ batch_btn.click(detect_batch, inputs=[batch_input, batch_model], outputs=batch_result)
261
+
262
+ with gr.Tab("πŸ“ˆ Model Performance"):
263
+ gr.Markdown(show_results())
264
+
265
+ with gr.Tab("πŸ“– API Documentation"):
266
+ gr.Markdown("""
267
+ ## REST API Usage
268
+
269
+ Deploy the FastAPI server locally:
270
+ ```bash
271
+ pip install fastapi uvicorn
272
+ uvicorn api:app --host 0.0.0.0 --port 8000
273
+ ```
274
+
275
+ ### Endpoints
276
+
277
+ | Endpoint | Method | Description |
278
+ |----------|--------|-------------|
279
+ | `/health` | GET | Health check |
280
+ | `/models` | GET | List available models |
281
+ | `/predict` | POST | Single flow detection |
282
+ | `/predict/batch` | POST | Batch detection |
283
+ | `/stats` | GET | Usage statistics |
284
+ | `/sample` | GET | Sample flows |
285
+
286
+ ### Example Request
287
+ ```bash
288
+ curl -X POST http://localhost:8000/predict \\
289
+ -H "Content-Type: application/json" \\
290
+ -d '{
291
+ "features": [0,1,45,0,491,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,2,2,0,0,0,0,1,0,0,0,0.17],
292
+ "model": "XGBoost"
293
+ }'
294
+ ```
295
+
296
+ ### Example Response
297
+ ```json
298
+ {
299
+ "flow_id": "flow_1",
300
+ "prediction": 1,
301
+ "confidence": 0.9634,
302
+ "model": "XGBoost",
303
+ "latency_ms": 2.77,
304
+ "alert_level": "critical",
305
+ "timestamp": 1778206436.82
306
+ }
307
+ ```
308
+ """)
309
+
310
+ gr.Markdown("""
311
+ ---
312
+ **Project:** [github.com/Alaudeen/nids](https://huggingface.co/Alaudeen/nids-models) |
313
+ **Dataset:** [Mireu-Lab/NSL-KDD](https://huggingface.co/datasets/Mireu-Lab/NSL-KDD) |
314
+ **License:** MIT
315
+ """)
316
+
317
+ if __name__ == "__main__":
318
+ demo.launch()