{
  "version": "1.0.0",
  "dataset": "xpertsystems/cyb010-sample",
  "task": "5-class attack_lifecycle_phase classification",
  "baselines": {
    "always_predict_majority_accuracy": 0.5593129361245304,
    "majority_class": "benign_background",
    "random_guess_accuracy": 0.2
  },
  "split": {
    "strategy": "group-aware (GroupShuffleSplit on incident_id, nested 70/15/15)",
    "rationale": "500 incidents x ~44 events each. Events from the same incident share host, threat actor, and phase trajectory. Group-aware splitting prevents train/test leakage. ~75 test incidents per fold.",
    "events_train": 14697,
    "events_val": 3473,
    "events_test": 3726,
    "n_incidents_train": 350,
    "seed": 42
  },
  "n_features": 87,
  "label_classes": [
    "benign_background",
    "initial_access",
    "lateral_movement",
    "persistence_establishment",
    "exfiltration_or_impact"
  ],
  "class_distribution_train": {
    "benign_background": 8547,
    "exfiltration_or_impact": 3898,
    "initial_access": 1187,
    "lateral_movement": 670,
    "persistence_establishment": 395
  },
  "class_distribution_test": {
    "benign_background": 2084,
    "exfiltration_or_impact": 1186,
    "initial_access": 247,
    "lateral_movement": 118,
    "persistence_establishment": 91
  },
  "oracle_excluded_features": [
    "mitre_tactic (benign value -> benign_background phase, perfect oracle)",
    "mitre_technique_id (ATT&CK-by-design perfect oracle for mitre_tactic)",
    "label_malicious (False -> benign_background, perfect oracle)",
    "threat_actor_id (NONE -> benign, perfect oracle)",
    "threat_actor_profile (benign_user -> benign_background, perfect oracle)",
    "event_type (many values phase-specific; e.g. c2_beacon_outbound -> 100% exfil)"
  ],
  "leakage_audit_note": "See leakage_diagnostic.json for the full audit. 11 oracle paths documented (4 phase oracles, 1 ATT&CK indirect, 6 event_type near-oracles, 7 alert-task oracles), and 2 unlearnable README-suggested targets after honest leakage removal.",
  "models": {
    "xgboost": {
      "architecture": "Gradient-boosted decision trees, multi:softprob, 5 classes",
      "framework": "xgboost",
      "test_metrics": {
        "model": "xgboost",
        "accuracy": 0.9492753623188406,
        "macro_f1": 0.7780594102481514,
        "weighted_f1": 0.9522470071864876,
        "per_class_f1": {
          "benign_background": 0.9975996159385502,
          "initial_access": 0.7196652719665272,
          "lateral_movement": 0.48322147651006714,
          "persistence_establishment": 0.703030303030303,
          "exfiltration_or_impact": 0.9867803837953092
        },
        "confusion_matrix": {
          "labels": [
            "benign_background",
            "initial_access",
            "lateral_movement",
            "persistence_establishment",
            "exfiltration_or_impact"
          ],
          "matrix": [
            [
              2078,
              6,
              0,
              0,
              0
            ],
            [
              4,
              172,
              65,
              6,
              0
            ],
            [
              0,
              38,
              72,
              6,
              2
            ],
            [
              0,
              11,
              22,
              58,
              0
            ],
            [
              0,
              4,
              21,
              4,
              1157
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.9904125505537232
      }
    },
    "mlp": {
      "architecture": "PyTorch MLP, 87 -> 128 -> 64 -> 5, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
      "framework": "pytorch",
      "test_metrics": {
        "model": "mlp",
        "accuracy": 0.9412238325281803,
        "macro_f1": 0.7533989932595785,
        "weighted_f1": 0.9423850278932477,
        "per_class_f1": {
          "benign_background": 0.9937679769894535,
          "initial_access": 0.6511627906976745,
          "lateral_movement": 0.4507042253521127,
          "persistence_establishment": 0.6903553299492385,
          "exfiltration_or_impact": 0.9810046433094133
        },
        "confusion_matrix": {
          "labels": [
            "benign_background",
            "initial_access",
            "lateral_movement",
            "persistence_establishment",
            "exfiltration_or_impact"
          ],
          "matrix": [
            [
              2073,
              11,
              0,
              0,
              0
            ],
            [
              10,
              140,
              72,
              17,
              8
            ],
            [
              2,
              27,
              64,
              12,
              13
            ],
            [
              2,
              4,
              17,
              68,
              0
            ],
            [
              1,
              1,
              13,
              9,
              1162
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.986126094475466
      }
    }
  }
}