cyb004-baseline-classifier / validation_results.json

Initial release: XGBoost + MLP for phishing campaign-phase classification

16be928 verified 2 days ago

6.06 kB

	{
	"version": "1.0.0",
	"dataset": "xpertsystems/cyb004-sample",
	"task": "7-class campaign_phase classification",
	"baselines": {
	"always_predict_majority_accuracy": 0.24444444444444444,
	"majority_class": "email_delivery",
	"random_guess_accuracy": 0.14285714285714285
	},
	"split": {
	"strategy": "group_aware (GroupShuffleSplit by campaign_id, nested)",
	"rationale": "100 phishing campaigns generate ~3,952 timesteps (~40 per campaign). Random row-split would leak per-campaign correlations into the test fold. Group-aware split keeps train/val/test campaigns disjoint.",
	"campaigns_train": 69,
	"campaigns_val": 16,
	"campaigns_test": 15,
	"timesteps_train": 2769,
	"timesteps_val": 598,
	"timesteps_test": 585,
	"seed": 42
	},
	"n_features": 53,
	"label_classes": [
	"target_reconnaissance",
	"infrastructure_setup",
	"lure_crafting",
	"email_delivery",
	"victim_engagement",
	"credential_harvesting",
	"post_compromise_escalation"
	],
	"class_distribution_train": {
	"email_delivery": 655,
	"victim_engagement": 459,
	"post_compromise_escalation": 388,
	"target_reconnaissance": 381,
	"credential_harvesting": 352,
	"lure_crafting": 300,
	"infrastructure_setup": 234
	},
	"class_distribution_test": {
	"email_delivery": 143,
	"victim_engagement": 100,
	"target_reconnaissance": 84,
	"post_compromise_escalation": 75,
	"lure_crafting": 67,
	"credential_harvesting": 63,
	"infrastructure_setup": 53
	},
	"leakage_excluded_features": [
	"delivery_outcome (purity 0.36 vs phase; no_delivery appears only in early phases - near-oracle)"
	],
	"models": {
	"xgboost": {
	"architecture": "Gradient-boosted decision trees, multi:softprob, 7 classes",
	"framework": "xgboost",
	"test_metrics": {
	"model": "xgboost",
	"accuracy": 0.6547008547008547,
	"macro_f1": 0.6401276666852063,
	"weighted_f1": 0.657179533714298,
	"per_class_f1": {
	"target_reconnaissance": 0.8875739644970414,
	"infrastructure_setup": 0.7115384615384616,
	"lure_crafting": 0.6762589928057554,
	"email_delivery": 0.7913669064748201,
	"victim_engagement": 0.46938775510204084,
	"credential_harvesting": 0.34074074074074073,
	"post_compromise_escalation": 0.6040268456375839
	},
	"confusion_matrix": {
	"labels": [
	"target_reconnaissance",
	"infrastructure_setup",
	"lure_crafting",
	"email_delivery",
	"victim_engagement",
	"credential_harvesting",
	"post_compromise_escalation"
	],
	"matrix": [
	[
	75,
	0,
	9,
	0,
	0,
	0,
	0
	],
	[
	0,
	37,
	16,
	0,
	0,
	0,
	0
	],
	[
	10,
	10,
	47,
	0,
	0,
	0,
	0
	],
	[
	0,
	4,
	0,
	110,
	28,
	1,
	0
	],
	[
	0,
	0,
	0,
	21,
	46,
	24,
	9
	],
	[
	0,
	0,
	0,
	4,
	16,
	23,
	20
	],
	[
	0,
	0,
	0,
	0,
	6,
	24,
	45
	]
	]
	},
	"macro_roc_auc_ovr": 0.935584434710217
	}
	},
	"mlp": {
	"architecture": "PyTorch MLP, 53 -> 128 -> 64 -> 7, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
	"framework": "pytorch",
	"test_metrics": {
	"model": "mlp",
	"accuracy": 0.6427350427350428,
	"macro_f1": 0.6275373447450349,
	"weighted_f1": 0.6380162402905546,
	"per_class_f1": {
	"target_reconnaissance": 0.8313253012048193,
	"infrastructure_setup": 0.7017543859649122,
	"lure_crafting": 0.5606060606060606,
	"email_delivery": 0.7612456747404844,
	"victim_engagement": 0.3867403314917127,
	"credential_harvesting": 0.43410852713178294,
	"post_compromise_escalation": 0.7169811320754716
	},
	"confusion_matrix": {
	"labels": [
	"target_reconnaissance",
	"infrastructure_setup",
	"lure_crafting",
	"email_delivery",
	"victim_engagement",
	"credential_harvesting",
	"post_compromise_escalation"
	],
	"matrix": [
	[
	69,
	1,
	14,
	0,
	0,
	0,
	0
	],
	[
	0,
	40,
	13,
	0,
	0,
	0,
	0
	],
	[
	13,
	17,
	37,
	0,
	0,
	0,
	0
	],
	[
	0,
	3,
	1,
	110,
	23,
	6,
	0
	],
	[
	0,
	0,
	0,
	32,
	35,
	21,
	12
	],
	[
	0,
	0,
	0,
	4,
	16,
	28,
	15
	],
	[
	0,
	0,
	0,
	0,
	7,
	11,
	57
	]
	]
	},
	"macro_roc_auc_ovr": 0.9264812360054401
	}
	}
	}
	}