{ "version": "1.0.0", "dataset": "xpertsystems/cyb007-sample", "task": "3-class actor_threat_type classification", "baselines": { "always_predict_majority_accuracy": 0.4266666666666667, "majority_class": "negligent_user", "random_guess_accuracy": 0.3333333333333333 }, "split": { "strategy": "group_aware (GroupShuffleSplit by incident_id, nested)", "rationale": "500 insider threat incidents generate 32,500 timesteps (65 per incident). Random row-split would leak per-incident correlations into the test fold. Group-aware split keeps train/val/test incidents disjoint.", "incidents_train": 350, "incidents_val": 75, "incidents_test": 75, "timesteps_train": 22750, "timesteps_val": 4875, "timesteps_test": 4875, "seed": 42 }, "n_features": 28, "label_classes": [ "negligent_user", "malicious_employee", "privileged_insider" ], "class_distribution_train": { "negligent_user": 11895, "malicious_employee": 6370, "privileged_insider": 4485 }, "class_distribution_test": { "negligent_user": 2080, "malicious_employee": 1755, "privileged_insider": 1040 }, "leakage_excluded_features": [], "leakage_audit_notes": "Two features were audited as potential tier oracles: data_access_volume_mb (privileged 0-2541 MB, malicious 0-328, negligent 0-88; overlap [0, 88] covers most timesteps with median ~9 MB each) and exfiltration_volume_mb_cumulative (similar shape). Both have substantial distributional overlap across tiers and represent legitimate observables. Removing both features drops accuracy from 0.85 to 0.47 (below majority), confirming they are real signal rather than oracle leakage. detection_outcome is a near-oracle for INCIDENT_PHASE (purity 0.79, max 1.00 for reconnaissance) but has uniform purity vs tier (~0.50) and is kept as a feature for tier prediction. No features dropped.", "models": { "xgboost": { "architecture": "Gradient-boosted decision trees, multi:softprob, 3 classes", "framework": "xgboost", "test_metrics": { "model": "xgboost", "accuracy": 0.8529230769230769, "macro_f1": 0.8495931102241494, "weighted_f1": 0.8518585237469937, "per_class_f1": { "negligent_user": 0.8762557077625571, "malicious_employee": 0.8262571514604035, "privileged_insider": 0.8462664714494875 }, "confusion_matrix": { "labels": [ "negligent_user", "malicious_employee", "privileged_insider" ], "matrix": [ [ 1919, 111, 50 ], [ 291, 1372, 92 ], [ 90, 83, 867 ] ] }, "macro_roc_auc_ovr": 0.9627526877302969 } }, "mlp": { "architecture": "PyTorch MLP, 28 -> 128 -> 64 -> 3, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss", "framework": "pytorch", "test_metrics": { "model": "mlp", "accuracy": 0.8685128205128205, "macro_f1": 0.8636019696274673, "weighted_f1": 0.866725739844854, "per_class_f1": { "negligent_user": 0.8934753661784287, "malicious_employee": 0.8414481897627965, "privileged_insider": 0.8558823529411764 }, "confusion_matrix": { "labels": [ "negligent_user", "malicious_employee", "privileged_insider" ], "matrix": [ [ 2013, 22, 45 ], [ 325, 1348, 82 ], [ 88, 79, 873 ] ] }, "macro_roc_auc_ovr": 0.9660800234091633 } } } }