{ "version": "1.0.0", "dataset": "xpertsystems/cyb011-sample", "task": "7-class attack_phase classification", "baselines": { "always_predict_majority_accuracy": 0.5033333333333333, "majority_class": "evasion_attempt", "random_guess_accuracy": 0.14285714285714285 }, "split": { "strategy": "group-aware (GroupShuffleSplit on campaign_id, nested 70/15/15)", "rationale": "200 campaigns x 70 timesteps each. Timesteps from the same campaign share attacker, target segment, and tier - so train/test contamination is a real risk with random splitting. ~30 test campaigns per fold.", "events_train": 9730, "events_val": 2170, "events_test": 2100, "seed": 42 }, "n_features": 37, "label_classes": [ "reconnaissance", "feature_space_probe", "perturbation_craft", "evasion_attempt", "feedback_adaptation", "campaign_consolidation", "idle_dwell" ], "class_distribution_train": { "evasion_attempt": 5082, "idle_dwell": 1677, "feature_space_probe": 983, "campaign_consolidation": 571, "reconnaissance": 558, "perturbation_craft": 511, "feedback_adaptation": 348 }, "class_distribution_test": { "evasion_attempt": 1057, "idle_dwell": 388, "feature_space_probe": 220, "reconnaissance": 128, "campaign_consolidation": 116, "perturbation_craft": 115, "feedback_adaptation": 76 }, "oracle_excluded_features": [ "detection_outcome (perfect oracle for evasion_attempt phase)", "detector_confidence_score (mechanical decoder for detection_outcome)", "evasion_budget_consumed (==0 is perfect oracle for 3 early phases)" ], "timestep_kept_as_legitimate_feature": "timestep is KEPT as a feature. It's a partial oracle for 3 phases (reconnaissance, feedback_adaptation, campaign_consolidation) but is a legitimate campaign-progress observable a defender would have at decision time. Removing it drops accuracy by ~9pp.", "leakage_audit_note": "See leakage_diagnostic.json for the full 6-oracle-path audit, 4 unlearnable README-suggested targets, and the missing nation_state attacker tier note.", "models": { "xgboost": { "architecture": "Gradient-boosted decision trees, multi:softprob, 7 classes", "framework": "xgboost", "test_metrics": { "model": "xgboost", "accuracy": 0.8642857142857143, "macro_f1": 0.7693247628697397, "weighted_f1": 0.8650489644308249, "per_class_f1": { "reconnaissance": 0.8865248226950354, "feature_space_probe": 0.7829977628635347, "perturbation_craft": 0.4927536231884058, "evasion_attempt": 0.9962013295346629, "feedback_adaptation": 0.7151515151515152, "campaign_consolidation": 0.8075471698113208, "idle_dwell": 0.7040971168437026 }, "confusion_matrix": { "labels": [ "reconnaissance", "feature_space_probe", "perturbation_craft", "evasion_attempt", "feedback_adaptation", "campaign_consolidation", "idle_dwell" ], "matrix": [ [ 125, 0, 0, 0, 0, 0, 3 ], [ 0, 175, 43, 0, 0, 0, 2 ], [ 0, 20, 68, 0, 0, 0, 27 ], [ 0, 0, 2, 1049, 0, 0, 6 ], [ 0, 0, 0, 0, 59, 16, 1 ], [ 0, 0, 0, 0, 9, 107, 0 ], [ 29, 32, 48, 0, 21, 26, 232 ] ] }, "macro_roc_auc_ovr": 0.9752868672798508 } }, "mlp": { "architecture": "PyTorch MLP, 37 -> 128 -> 64 -> 7, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss", "framework": "pytorch", "test_metrics": { "model": "mlp", "accuracy": 0.8385714285714285, "macro_f1": 0.7344635260259678, "weighted_f1": 0.8387834443096441, "per_class_f1": { "reconnaissance": 0.8737201365187713, "feature_space_probe": 0.746606334841629, "perturbation_craft": 0.49707602339181284, "evasion_attempt": 0.9928537398761315, "feedback_adaptation": 0.627906976744186, "campaign_consolidation": 0.784452296819788, "idle_dwell": 0.6186291739894552 }, "confusion_matrix": { "labels": [ "reconnaissance", "feature_space_probe", "perturbation_craft", "evasion_attempt", "feedback_adaptation", "campaign_consolidation", "idle_dwell" ], "matrix": [ [ 128, 0, 0, 0, 0, 0, 0 ], [ 0, 165, 55, 0, 0, 0, 0 ], [ 5, 24, 85, 0, 0, 0, 1 ], [ 0, 4, 2, 1042, 4, 1, 4 ], [ 0, 0, 0, 0, 54, 22, 0 ], [ 0, 0, 0, 0, 5, 111, 0 ], [ 32, 29, 85, 0, 33, 33, 176 ] ] }, "macro_roc_auc_ovr": 0.9705026035482472 } } } }