cyb008-baseline-classifier / validation_results.json

Initial release: XGBoost + MLP for SOC alert triage outcome classification, with structural-leakage and unlearnable-target diagnostic

001717c verified 2 days ago

raw

history blame contribute delete

5.35 kB

	{
	"version": "1.0.0",
	"dataset": "xpertsystems/cyb008-sample",
	"task": "5-class resolution_outcome classification (SOC alert triage)",
	"baselines": {
	"always_predict_majority_accuracy": 0.32608695652173914,
	"majority_class": "false_positive_closed",
	"random_guess_accuracy": 0.2
	},
	"split": {
	"strategy": "stratified (StratifiedShuffleSplit, nested 70/15/15)",
	"rationale": "CYB008 has no natural row-level group key: 25 analysts (group-aware split would yield ~4 test analysts), 5 SOCs (would yield 1 test SOC), 589 incidents but only 9% of alerts have a non-null incident_id. Alerts are essentially independent given features, so stratified random split is the right choice (same approach as CYB001 for network flow classification).",
	"alerts_train": 6440,
	"alerts_val": 1380,
	"alerts_test": 1380,
	"seed": 42
	},
	"n_features": 53,
	"label_classes": [
	"auto_resolved_soar",
	"duplicate_merged",
	"false_positive_closed",
	"true_positive_remediated",
	"true_positive_escalated"
	],
	"class_distribution_train": {
	"false_positive_closed": 2097,
	"auto_resolved_soar": 1849,
	"true_positive_remediated": 1294,
	"true_positive_escalated": 923,
	"duplicate_merged": 277
	},
	"class_distribution_test": {
	"false_positive_closed": 450,
	"auto_resolved_soar": 396,
	"true_positive_remediated": 277,
	"true_positive_escalated": 198,
	"duplicate_merged": 59
	},
	"oracle_excluded_features": [
	"alert_lifecycle_phase (deterministically maps to 3 of 5 outcomes)",
	"automation_resolved (1:1 with auto_resolved_soar)",
	"escalation_flag (near 1:1 with true_positive_escalated)"
	],
	"high_cardinality_excluded_features": [
	"mitre_technique_id (36 unique values; perfect oracle for mitre_tactic but unrelated to this target)",
	"detection_rule_id (656 unique values; one-hot explosion)"
	],
	"leakage_audit_note": "See leakage_diagnostic.json for the full audit of structural oracles and the separate unlearnable-target finding for mitre_tactic. The model is trained with all three oracle columns excluded; full-features experiments showed 100% test accuracy, confirming the structural leakage.",
	"models": {
	"xgboost": {
	"architecture": "Gradient-boosted decision trees, multi:softprob, 5 classes",
	"framework": "xgboost",
	"test_metrics": {
	"model": "xgboost",
	"accuracy": 0.7659420289855072,
	"macro_f1": 0.7429876131468711,
	"weighted_f1": 0.7669168766123218,
	"per_class_f1": {
	"auto_resolved_soar": 0.7572383073496659,
	"duplicate_merged": 0.7936507936507936,
	"false_positive_closed": 0.9038461538461539,
	"true_positive_remediated": 0.7012987012987013,
	"true_positive_escalated": 0.5589041095890411
	},
	"confusion_matrix": {
	"labels": [
	"auto_resolved_soar",
	"duplicate_merged",
	"false_positive_closed",
	"true_positive_remediated",
	"true_positive_escalated"
	],
	"matrix": [
	[
	340,
	17,
	6,
	16,
	17
	],
	[
	9,
	50,
	0,
	0,
	0
	],
	[
	74,
	0,
	376,
	0,
	0
	],
	[
	40,
	0,
	0,
	189,
	48
	],
	[
	39,
	0,
	0,
	57,
	102
	]
	]
	},
	"macro_roc_auc_ovr": 0.9522005654044479
	}
	},
	"mlp": {
	"architecture": "PyTorch MLP, 53 -> 128 -> 64 -> 5, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
	"framework": "pytorch",
	"test_metrics": {
	"model": "mlp",
	"accuracy": 0.7673913043478261,
	"macro_f1": 0.7510024599009764,
	"weighted_f1": 0.769556192579193,
	"per_class_f1": {
	"auto_resolved_soar": 0.7505773672055427,
	"duplicate_merged": 0.8251748251748252,
	"false_positive_closed": 0.910411622276029,
	"true_positive_remediated": 0.6981818181818182,
	"true_positive_escalated": 0.5706666666666667
	},
	"confusion_matrix": {
	"labels": [
	"auto_resolved_soar",
	"duplicate_merged",
	"false_positive_closed",
	"true_positive_remediated",
	"true_positive_escalated"
	],
	"matrix": [
	[
	325,
	25,
	0,
	23,
	23
	],
	[
	0,
	59,
	0,
	0,
	0
	],
	[
	74,
	0,
	376,
	0,
	0
	],
	[
	38,
	0,
	0,
	192,
	47
	],
	[
	33,
	0,
	0,
	58,
	107
	]
	]
	},
	"macro_roc_auc_ovr": 0.9552409409036638
	}
	}
	}
	}