{ "version": "1.0.0", "dataset": "xpertsystems/cyb004-sample", "task": "7-class campaign_phase classification", "baselines": { "always_predict_majority_accuracy": 0.24444444444444444, "majority_class": "email_delivery", "random_guess_accuracy": 0.14285714285714285 }, "split": { "strategy": "group_aware (GroupShuffleSplit by campaign_id, nested)", "rationale": "100 phishing campaigns generate ~3,952 timesteps (~40 per campaign). Random row-split would leak per-campaign correlations into the test fold. Group-aware split keeps train/val/test campaigns disjoint.", "campaigns_train": 69, "campaigns_val": 16, "campaigns_test": 15, "timesteps_train": 2769, "timesteps_val": 598, "timesteps_test": 585, "seed": 42 }, "n_features": 53, "label_classes": [ "target_reconnaissance", "infrastructure_setup", "lure_crafting", "email_delivery", "victim_engagement", "credential_harvesting", "post_compromise_escalation" ], "class_distribution_train": { "email_delivery": 655, "victim_engagement": 459, "post_compromise_escalation": 388, "target_reconnaissance": 381, "credential_harvesting": 352, "lure_crafting": 300, "infrastructure_setup": 234 }, "class_distribution_test": { "email_delivery": 143, "victim_engagement": 100, "target_reconnaissance": 84, "post_compromise_escalation": 75, "lure_crafting": 67, "credential_harvesting": 63, "infrastructure_setup": 53 }, "leakage_excluded_features": [ "delivery_outcome (purity 0.36 vs phase; no_delivery appears only in early phases - near-oracle)" ], "models": { "xgboost": { "architecture": "Gradient-boosted decision trees, multi:softprob, 7 classes", "framework": "xgboost", "test_metrics": { "model": "xgboost", "accuracy": 0.6547008547008547, "macro_f1": 0.6401276666852063, "weighted_f1": 0.657179533714298, "per_class_f1": { "target_reconnaissance": 0.8875739644970414, "infrastructure_setup": 0.7115384615384616, "lure_crafting": 0.6762589928057554, "email_delivery": 0.7913669064748201, "victim_engagement": 0.46938775510204084, "credential_harvesting": 0.34074074074074073, "post_compromise_escalation": 0.6040268456375839 }, "confusion_matrix": { "labels": [ "target_reconnaissance", "infrastructure_setup", "lure_crafting", "email_delivery", "victim_engagement", "credential_harvesting", "post_compromise_escalation" ], "matrix": [ [ 75, 0, 9, 0, 0, 0, 0 ], [ 0, 37, 16, 0, 0, 0, 0 ], [ 10, 10, 47, 0, 0, 0, 0 ], [ 0, 4, 0, 110, 28, 1, 0 ], [ 0, 0, 0, 21, 46, 24, 9 ], [ 0, 0, 0, 4, 16, 23, 20 ], [ 0, 0, 0, 0, 6, 24, 45 ] ] }, "macro_roc_auc_ovr": 0.935584434710217 } }, "mlp": { "architecture": "PyTorch MLP, 53 -> 128 -> 64 -> 7, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss", "framework": "pytorch", "test_metrics": { "model": "mlp", "accuracy": 0.6427350427350428, "macro_f1": 0.6275373447450349, "weighted_f1": 0.6380162402905546, "per_class_f1": { "target_reconnaissance": 0.8313253012048193, "infrastructure_setup": 0.7017543859649122, "lure_crafting": 0.5606060606060606, "email_delivery": 0.7612456747404844, "victim_engagement": 0.3867403314917127, "credential_harvesting": 0.43410852713178294, "post_compromise_escalation": 0.7169811320754716 }, "confusion_matrix": { "labels": [ "target_reconnaissance", "infrastructure_setup", "lure_crafting", "email_delivery", "victim_engagement", "credential_harvesting", "post_compromise_escalation" ], "matrix": [ [ 69, 1, 14, 0, 0, 0, 0 ], [ 0, 40, 13, 0, 0, 0, 0 ], [ 13, 17, 37, 0, 0, 0, 0 ], [ 0, 3, 1, 110, 23, 6, 0 ], [ 0, 0, 0, 32, 35, 21, 12 ], [ 0, 0, 0, 4, 16, 28, 15 ], [ 0, 0, 0, 0, 7, 11, 57 ] ] }, "macro_roc_auc_ovr": 0.9264812360054401 } } } }