cyb004-baseline-classifier / validation_results.json
pradeep-xpert's picture
Initial release: XGBoost + MLP for phishing campaign-phase classification
16be928 verified
{
"version": "1.0.0",
"dataset": "xpertsystems/cyb004-sample",
"task": "7-class campaign_phase classification",
"baselines": {
"always_predict_majority_accuracy": 0.24444444444444444,
"majority_class": "email_delivery",
"random_guess_accuracy": 0.14285714285714285
},
"split": {
"strategy": "group_aware (GroupShuffleSplit by campaign_id, nested)",
"rationale": "100 phishing campaigns generate ~3,952 timesteps (~40 per campaign). Random row-split would leak per-campaign correlations into the test fold. Group-aware split keeps train/val/test campaigns disjoint.",
"campaigns_train": 69,
"campaigns_val": 16,
"campaigns_test": 15,
"timesteps_train": 2769,
"timesteps_val": 598,
"timesteps_test": 585,
"seed": 42
},
"n_features": 53,
"label_classes": [
"target_reconnaissance",
"infrastructure_setup",
"lure_crafting",
"email_delivery",
"victim_engagement",
"credential_harvesting",
"post_compromise_escalation"
],
"class_distribution_train": {
"email_delivery": 655,
"victim_engagement": 459,
"post_compromise_escalation": 388,
"target_reconnaissance": 381,
"credential_harvesting": 352,
"lure_crafting": 300,
"infrastructure_setup": 234
},
"class_distribution_test": {
"email_delivery": 143,
"victim_engagement": 100,
"target_reconnaissance": 84,
"post_compromise_escalation": 75,
"lure_crafting": 67,
"credential_harvesting": 63,
"infrastructure_setup": 53
},
"leakage_excluded_features": [
"delivery_outcome (purity 0.36 vs phase; no_delivery appears only in early phases - near-oracle)"
],
"models": {
"xgboost": {
"architecture": "Gradient-boosted decision trees, multi:softprob, 7 classes",
"framework": "xgboost",
"test_metrics": {
"model": "xgboost",
"accuracy": 0.6547008547008547,
"macro_f1": 0.6401276666852063,
"weighted_f1": 0.657179533714298,
"per_class_f1": {
"target_reconnaissance": 0.8875739644970414,
"infrastructure_setup": 0.7115384615384616,
"lure_crafting": 0.6762589928057554,
"email_delivery": 0.7913669064748201,
"victim_engagement": 0.46938775510204084,
"credential_harvesting": 0.34074074074074073,
"post_compromise_escalation": 0.6040268456375839
},
"confusion_matrix": {
"labels": [
"target_reconnaissance",
"infrastructure_setup",
"lure_crafting",
"email_delivery",
"victim_engagement",
"credential_harvesting",
"post_compromise_escalation"
],
"matrix": [
[
75,
0,
9,
0,
0,
0,
0
],
[
0,
37,
16,
0,
0,
0,
0
],
[
10,
10,
47,
0,
0,
0,
0
],
[
0,
4,
0,
110,
28,
1,
0
],
[
0,
0,
0,
21,
46,
24,
9
],
[
0,
0,
0,
4,
16,
23,
20
],
[
0,
0,
0,
0,
6,
24,
45
]
]
},
"macro_roc_auc_ovr": 0.935584434710217
}
},
"mlp": {
"architecture": "PyTorch MLP, 53 -> 128 -> 64 -> 7, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
"framework": "pytorch",
"test_metrics": {
"model": "mlp",
"accuracy": 0.6427350427350428,
"macro_f1": 0.6275373447450349,
"weighted_f1": 0.6380162402905546,
"per_class_f1": {
"target_reconnaissance": 0.8313253012048193,
"infrastructure_setup": 0.7017543859649122,
"lure_crafting": 0.5606060606060606,
"email_delivery": 0.7612456747404844,
"victim_engagement": 0.3867403314917127,
"credential_harvesting": 0.43410852713178294,
"post_compromise_escalation": 0.7169811320754716
},
"confusion_matrix": {
"labels": [
"target_reconnaissance",
"infrastructure_setup",
"lure_crafting",
"email_delivery",
"victim_engagement",
"credential_harvesting",
"post_compromise_escalation"
],
"matrix": [
[
69,
1,
14,
0,
0,
0,
0
],
[
0,
40,
13,
0,
0,
0,
0
],
[
13,
17,
37,
0,
0,
0,
0
],
[
0,
3,
1,
110,
23,
6,
0
],
[
0,
0,
0,
32,
35,
21,
12
],
[
0,
0,
0,
4,
16,
28,
15
],
[
0,
0,
0,
0,
7,
11,
57
]
]
},
"macro_roc_auc_ovr": 0.9264812360054401
}
}
}
}