cyb008-baseline-classifier / validation_results.json
pradeep-xpert's picture
Initial release: XGBoost + MLP for SOC alert triage outcome classification, with structural-leakage and unlearnable-target diagnostic
001717c verified
{
"version": "1.0.0",
"dataset": "xpertsystems/cyb008-sample",
"task": "5-class resolution_outcome classification (SOC alert triage)",
"baselines": {
"always_predict_majority_accuracy": 0.32608695652173914,
"majority_class": "false_positive_closed",
"random_guess_accuracy": 0.2
},
"split": {
"strategy": "stratified (StratifiedShuffleSplit, nested 70/15/15)",
"rationale": "CYB008 has no natural row-level group key: 25 analysts (group-aware split would yield ~4 test analysts), 5 SOCs (would yield 1 test SOC), 589 incidents but only 9% of alerts have a non-null incident_id. Alerts are essentially independent given features, so stratified random split is the right choice (same approach as CYB001 for network flow classification).",
"alerts_train": 6440,
"alerts_val": 1380,
"alerts_test": 1380,
"seed": 42
},
"n_features": 53,
"label_classes": [
"auto_resolved_soar",
"duplicate_merged",
"false_positive_closed",
"true_positive_remediated",
"true_positive_escalated"
],
"class_distribution_train": {
"false_positive_closed": 2097,
"auto_resolved_soar": 1849,
"true_positive_remediated": 1294,
"true_positive_escalated": 923,
"duplicate_merged": 277
},
"class_distribution_test": {
"false_positive_closed": 450,
"auto_resolved_soar": 396,
"true_positive_remediated": 277,
"true_positive_escalated": 198,
"duplicate_merged": 59
},
"oracle_excluded_features": [
"alert_lifecycle_phase (deterministically maps to 3 of 5 outcomes)",
"automation_resolved (1:1 with auto_resolved_soar)",
"escalation_flag (near 1:1 with true_positive_escalated)"
],
"high_cardinality_excluded_features": [
"mitre_technique_id (36 unique values; perfect oracle for mitre_tactic but unrelated to this target)",
"detection_rule_id (656 unique values; one-hot explosion)"
],
"leakage_audit_note": "See leakage_diagnostic.json for the full audit of structural oracles and the separate unlearnable-target finding for mitre_tactic. The model is trained with all three oracle columns excluded; full-features experiments showed 100% test accuracy, confirming the structural leakage.",
"models": {
"xgboost": {
"architecture": "Gradient-boosted decision trees, multi:softprob, 5 classes",
"framework": "xgboost",
"test_metrics": {
"model": "xgboost",
"accuracy": 0.7659420289855072,
"macro_f1": 0.7429876131468711,
"weighted_f1": 0.7669168766123218,
"per_class_f1": {
"auto_resolved_soar": 0.7572383073496659,
"duplicate_merged": 0.7936507936507936,
"false_positive_closed": 0.9038461538461539,
"true_positive_remediated": 0.7012987012987013,
"true_positive_escalated": 0.5589041095890411
},
"confusion_matrix": {
"labels": [
"auto_resolved_soar",
"duplicate_merged",
"false_positive_closed",
"true_positive_remediated",
"true_positive_escalated"
],
"matrix": [
[
340,
17,
6,
16,
17
],
[
9,
50,
0,
0,
0
],
[
74,
0,
376,
0,
0
],
[
40,
0,
0,
189,
48
],
[
39,
0,
0,
57,
102
]
]
},
"macro_roc_auc_ovr": 0.9522005654044479
}
},
"mlp": {
"architecture": "PyTorch MLP, 53 -> 128 -> 64 -> 5, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
"framework": "pytorch",
"test_metrics": {
"model": "mlp",
"accuracy": 0.7673913043478261,
"macro_f1": 0.7510024599009764,
"weighted_f1": 0.769556192579193,
"per_class_f1": {
"auto_resolved_soar": 0.7505773672055427,
"duplicate_merged": 0.8251748251748252,
"false_positive_closed": 0.910411622276029,
"true_positive_remediated": 0.6981818181818182,
"true_positive_escalated": 0.5706666666666667
},
"confusion_matrix": {
"labels": [
"auto_resolved_soar",
"duplicate_merged",
"false_positive_closed",
"true_positive_remediated",
"true_positive_escalated"
],
"matrix": [
[
325,
25,
0,
23,
23
],
[
0,
59,
0,
0,
0
],
[
74,
0,
376,
0,
0
],
[
38,
0,
0,
192,
47
],
[
33,
0,
0,
58,
107
]
]
},
"macro_roc_auc_ovr": 0.9552409409036638
}
}
}
}