{
  "version": "1.0.0",
  "dataset": "xpertsystems/cyb008-sample",
  "task": "5-class resolution_outcome classification (SOC alert triage)",
  "baselines": {
    "always_predict_majority_accuracy": 0.32608695652173914,
    "majority_class": "false_positive_closed",
    "random_guess_accuracy": 0.2
  },
  "split": {
    "strategy": "stratified (StratifiedShuffleSplit, nested 70/15/15)",
    "rationale": "CYB008 has no natural row-level group key: 25 analysts (group-aware split would yield ~4 test analysts), 5 SOCs (would yield 1 test SOC), 589 incidents but only 9% of alerts have a non-null incident_id. Alerts are essentially independent given features, so stratified random split is the right choice (same approach as CYB001 for network flow classification).",
    "alerts_train": 6440,
    "alerts_val": 1380,
    "alerts_test": 1380,
    "seed": 42
  },
  "n_features": 53,
  "label_classes": [
    "auto_resolved_soar",
    "duplicate_merged",
    "false_positive_closed",
    "true_positive_remediated",
    "true_positive_escalated"
  ],
  "class_distribution_train": {
    "false_positive_closed": 2097,
    "auto_resolved_soar": 1849,
    "true_positive_remediated": 1294,
    "true_positive_escalated": 923,
    "duplicate_merged": 277
  },
  "class_distribution_test": {
    "false_positive_closed": 450,
    "auto_resolved_soar": 396,
    "true_positive_remediated": 277,
    "true_positive_escalated": 198,
    "duplicate_merged": 59
  },
  "oracle_excluded_features": [
    "alert_lifecycle_phase (deterministically maps to 3 of 5 outcomes)",
    "automation_resolved (1:1 with auto_resolved_soar)",
    "escalation_flag (near 1:1 with true_positive_escalated)"
  ],
  "high_cardinality_excluded_features": [
    "mitre_technique_id (36 unique values; perfect oracle for mitre_tactic but unrelated to this target)",
    "detection_rule_id (656 unique values; one-hot explosion)"
  ],
  "leakage_audit_note": "See leakage_diagnostic.json for the full audit of structural oracles and the separate unlearnable-target finding for mitre_tactic. The model is trained with all three oracle columns excluded; full-features experiments showed 100% test accuracy, confirming the structural leakage.",
  "models": {
    "xgboost": {
      "architecture": "Gradient-boosted decision trees, multi:softprob, 5 classes",
      "framework": "xgboost",
      "test_metrics": {
        "model": "xgboost",
        "accuracy": 0.7659420289855072,
        "macro_f1": 0.7429876131468711,
        "weighted_f1": 0.7669168766123218,
        "per_class_f1": {
          "auto_resolved_soar": 0.7572383073496659,
          "duplicate_merged": 0.7936507936507936,
          "false_positive_closed": 0.9038461538461539,
          "true_positive_remediated": 0.7012987012987013,
          "true_positive_escalated": 0.5589041095890411
        },
        "confusion_matrix": {
          "labels": [
            "auto_resolved_soar",
            "duplicate_merged",
            "false_positive_closed",
            "true_positive_remediated",
            "true_positive_escalated"
          ],
          "matrix": [
            [
              340,
              17,
              6,
              16,
              17
            ],
            [
              9,
              50,
              0,
              0,
              0
            ],
            [
              74,
              0,
              376,
              0,
              0
            ],
            [
              40,
              0,
              0,
              189,
              48
            ],
            [
              39,
              0,
              0,
              57,
              102
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.9522005654044479
      }
    },
    "mlp": {
      "architecture": "PyTorch MLP, 53 -> 128 -> 64 -> 5, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
      "framework": "pytorch",
      "test_metrics": {
        "model": "mlp",
        "accuracy": 0.7673913043478261,
        "macro_f1": 0.7510024599009764,
        "weighted_f1": 0.769556192579193,
        "per_class_f1": {
          "auto_resolved_soar": 0.7505773672055427,
          "duplicate_merged": 0.8251748251748252,
          "false_positive_closed": 0.910411622276029,
          "true_positive_remediated": 0.6981818181818182,
          "true_positive_escalated": 0.5706666666666667
        },
        "confusion_matrix": {
          "labels": [
            "auto_resolved_soar",
            "duplicate_merged",
            "false_positive_closed",
            "true_positive_remediated",
            "true_positive_escalated"
          ],
          "matrix": [
            [
              325,
              25,
              0,
              23,
              23
            ],
            [
              0,
              59,
              0,
              0,
              0
            ],
            [
              74,
              0,
              376,
              0,
              0
            ],
            [
              38,
              0,
              0,
              192,
              47
            ],
            [
              33,
              0,
              0,
              58,
              107
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.9552409409036638
      }
    }
  }
}