{ "version": "1.0.0", "dataset": "xpertsystems/cyb002-sample", "task": "10-class kill_chain_phase classification", "baselines": { "always_predict_majority_accuracy": 0.19421487603305784, "majority_class": "dwell_idle", "random_guess_accuracy": 0.1 }, "split": { "strategy": "group_aware (GroupShuffleSplit by campaign_id, nested)", "rationale": "100 campaigns generate ~4,353 events; random row-split would leak campaign-level correlations into the test set. The group-aware split ensures train/val/test campaigns are disjoint.", "campaigns_train": 69, "campaigns_val": 16, "campaigns_test": 15, "events_train": 2822, "events_val": 805, "events_test": 726, "seed": 42 }, "n_features": 90, "label_classes": [ "dwell_idle", "reconnaissance", "initial_access", "execution", "persistence", "privilege_escalation", "lateral_movement", "collection", "exfiltration", "impact" ], "class_distribution_train": { "dwell_idle": 609, "reconnaissance": 439, "initial_access": 346, "execution": 313, "persistence": 275, "privilege_escalation": 254, "lateral_movement": 205, "collection": 165, "exfiltration": 117, "impact": 99 }, "class_distribution_test": { "dwell_idle": 141, "reconnaissance": 112, "initial_access": 106, "persistence": 79, "execution": 74, "privilege_escalation": 68, "lateral_movement": 54, "collection": 40, "exfiltration": 31, "impact": 21 }, "leakage_excluded_features": [ "technique_id (62/63 techniques map 1:1 to a single phase)", "technique_name (1:1 alias of technique_id)", "tactic_category (direct alias of kill_chain_phase)" ], "models": { "xgboost": { "architecture": "Gradient-boosted decision trees, multi:softprob, 10 classes", "framework": "xgboost", "test_metrics": { "model": "xgboost", "accuracy": 0.46831955922865015, "macro_f1": 0.42549880749552066, "weighted_f1": 0.440668872633435, "per_class_f1": { "dwell_idle": 0.040268456375838924, "reconnaissance": 0.7532467532467533, "initial_access": 0.6467661691542289, "execution": 0.4406779661016949, "persistence": 0.41304347826086957, "privilege_escalation": 0.5, "lateral_movement": 0.7422680412371134, "collection": 0.22018348623853212, "exfiltration": 0.2727272727272727, "impact": 0.22580645161290322 }, "confusion_matrix": { "labels": [ "dwell_idle", "reconnaissance", "initial_access", "execution", "persistence", "privilege_escalation", "lateral_movement", "collection", "exfiltration", "impact" ], "matrix": [ [ 3, 23, 23, 18, 21, 18, 2, 17, 9, 7 ], [ 2, 87, 2, 21, 0, 0, 0, 0, 0, 0 ], [ 1, 5, 65, 5, 3, 26, 1, 0, 0, 0 ], [ 2, 4, 1, 39, 24, 3, 1, 0, 0, 0 ], [ 0, 0, 1, 12, 38, 9, 0, 18, 1, 0 ], [ 0, 0, 3, 8, 4, 44, 3, 5, 1, 0 ], [ 0, 0, 0, 0, 6, 6, 36, 2, 0, 4 ], [ 0, 0, 0, 0, 2, 1, 0, 12, 15, 10 ], [ 0, 0, 0, 0, 5, 0, 0, 4, 9, 13 ], [ 0, 0, 0, 0, 2, 1, 0, 11, 0, 7 ] ] }, "macro_roc_auc_ovr": 0.8598653258869782 } }, "mlp": { "architecture": "PyTorch MLP, 90 -> 128 -> 64 -> 10, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss", "framework": "pytorch", "test_metrics": { "model": "mlp", "accuracy": 0.44490358126721763, "macro_f1": 0.3911186394257205, "weighted_f1": 0.4172764238320775, "per_class_f1": { "dwell_idle": 0.013422818791946308, "reconnaissance": 0.7250996015936255, "initial_access": 0.6484018264840182, "execution": 0.5100671140939598, "persistence": 0.30120481927710846, "privilege_escalation": 0.4880952380952381, "lateral_movement": 0.782608695652174, "collection": 0.19130434782608696, "exfiltration": 0.11940298507462686, "impact": 0.13157894736842105 }, "confusion_matrix": { "labels": [ "dwell_idle", "reconnaissance", "initial_access", "execution", "persistence", "privilege_escalation", "lateral_movement", "collection", "exfiltration", "impact" ], "matrix": [ [ 1, 26, 27, 11, 20, 18, 1, 20, 10, 7 ], [ 0, 91, 4, 10, 7, 0, 0, 0, 0, 0 ], [ 1, 4, 71, 1, 5, 21, 0, 3, 0, 0 ], [ 1, 10, 3, 38, 17, 3, 0, 2, 0, 0 ], [ 4, 8, 2, 8, 25, 9, 0, 11, 5, 7 ], [ 0, 0, 6, 7, 4, 41, 1, 7, 2, 0 ], [ 0, 0, 0, 0, 0, 7, 36, 3, 4, 4 ], [ 1, 0, 0, 0, 1, 1, 0, 11, 11, 15 ], [ 0, 0, 0, 0, 5, 0, 0, 5, 4, 17 ], [ 0, 0, 0, 0, 3, 0, 0, 13, 0, 5 ] ] }, "macro_roc_auc_ovr": 0.8496117986303245 } } } }