Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -66,45 +66,22 @@ def load_simulated_data(samples=2000):
|
|
| 66 |
return df
|
| 67 |
|
| 68 |
def preprocess_csv(df):
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
required_cic_columns = [
|
| 75 |
-
"Flow Duration",
|
| 76 |
-
"Total Fwd Packets",
|
| 77 |
-
"Total Backward Packets",
|
| 78 |
-
"Packet Length Mean",
|
| 79 |
-
"Label"
|
| 80 |
-
]
|
| 81 |
-
|
| 82 |
-
# Check if CSV is CIC-style
|
| 83 |
-
if not all(col in df.columns for col in required_cic_columns):
|
| 84 |
-
st.error("Uploaded CSV is not compatible with CIC-IDS feature format.")
|
| 85 |
-
st.stop()
|
| 86 |
-
|
| 87 |
-
# Convert labels
|
| 88 |
-
df["Label"] = df["Label"].apply(
|
| 89 |
-
lambda x: 0 if x == "BENIGN" else 1
|
| 90 |
-
)
|
| 91 |
|
| 92 |
-
# Rename CIC columns → internal standard names
|
| 93 |
df = df.rename(columns={
|
| 94 |
-
"Packet Length Mean": "packet_size",
|
| 95 |
"Flow Duration": "duration",
|
| 96 |
"Total Fwd Packets": "src_bytes",
|
| 97 |
"Total Backward Packets": "dst_bytes",
|
|
|
|
| 98 |
"Label": "label"
|
| 99 |
})
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
["packet_size", "duration", "src_bytes", "dst_bytes", "label"]
|
| 104 |
-
]
|
| 105 |
-
|
| 106 |
-
return df
|
| 107 |
-
|
| 108 |
|
| 109 |
# ============================
|
| 110 |
# MODEL TRAINING
|
|
|
|
| 66 |
return df
|
| 67 |
|
| 68 |
def preprocess_csv(df):
|
| 69 |
+
df = df.replace([np.inf, -np.inf], np.nan).dropna()
|
| 70 |
+
|
| 71 |
+
# Normalize CIC-like labels
|
| 72 |
+
if "Label" in df.columns:
|
| 73 |
+
df["Label"] = df["Label"].apply(lambda x: 0 if x == "BENIGN" else 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
|
|
|
| 75 |
df = df.rename(columns={
|
|
|
|
| 76 |
"Flow Duration": "duration",
|
| 77 |
"Total Fwd Packets": "src_bytes",
|
| 78 |
"Total Backward Packets": "dst_bytes",
|
| 79 |
+
"Packet Length Mean": "packet_size",
|
| 80 |
"Label": "label"
|
| 81 |
})
|
| 82 |
|
| 83 |
+
required = ["packet_size", "duration", "src_bytes", "dst_bytes", "label"]
|
| 84 |
+
return df[required]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
# ============================
|
| 87 |
# MODEL TRAINING
|