{
  "version": "1.0.0",
  "dataset": "xpertsystems/cyb004-sample",
  "task": "7-class campaign_phase classification",
  "baselines": {
    "always_predict_majority_accuracy": 0.24444444444444444,
    "majority_class": "email_delivery",
    "random_guess_accuracy": 0.14285714285714285
  },
  "split": {
    "strategy": "group_aware (GroupShuffleSplit by campaign_id, nested)",
    "rationale": "100 phishing campaigns generate ~3,952 timesteps (~40 per campaign). Random row-split would leak per-campaign correlations into the test fold. Group-aware split keeps train/val/test campaigns disjoint.",
    "campaigns_train": 69,
    "campaigns_val": 16,
    "campaigns_test": 15,
    "timesteps_train": 2769,
    "timesteps_val": 598,
    "timesteps_test": 585,
    "seed": 42
  },
  "n_features": 53,
  "label_classes": [
    "target_reconnaissance",
    "infrastructure_setup",
    "lure_crafting",
    "email_delivery",
    "victim_engagement",
    "credential_harvesting",
    "post_compromise_escalation"
  ],
  "class_distribution_train": {
    "email_delivery": 655,
    "victim_engagement": 459,
    "post_compromise_escalation": 388,
    "target_reconnaissance": 381,
    "credential_harvesting": 352,
    "lure_crafting": 300,
    "infrastructure_setup": 234
  },
  "class_distribution_test": {
    "email_delivery": 143,
    "victim_engagement": 100,
    "target_reconnaissance": 84,
    "post_compromise_escalation": 75,
    "lure_crafting": 67,
    "credential_harvesting": 63,
    "infrastructure_setup": 53
  },
  "leakage_excluded_features": [
    "delivery_outcome (purity 0.36 vs phase; no_delivery appears only in early phases - near-oracle)"
  ],
  "models": {
    "xgboost": {
      "architecture": "Gradient-boosted decision trees, multi:softprob, 7 classes",
      "framework": "xgboost",
      "test_metrics": {
        "model": "xgboost",
        "accuracy": 0.6547008547008547,
        "macro_f1": 0.6401276666852063,
        "weighted_f1": 0.657179533714298,
        "per_class_f1": {
          "target_reconnaissance": 0.8875739644970414,
          "infrastructure_setup": 0.7115384615384616,
          "lure_crafting": 0.6762589928057554,
          "email_delivery": 0.7913669064748201,
          "victim_engagement": 0.46938775510204084,
          "credential_harvesting": 0.34074074074074073,
          "post_compromise_escalation": 0.6040268456375839
        },
        "confusion_matrix": {
          "labels": [
            "target_reconnaissance",
            "infrastructure_setup",
            "lure_crafting",
            "email_delivery",
            "victim_engagement",
            "credential_harvesting",
            "post_compromise_escalation"
          ],
          "matrix": [
            [
              75,
              0,
              9,
              0,
              0,
              0,
              0
            ],
            [
              0,
              37,
              16,
              0,
              0,
              0,
              0
            ],
            [
              10,
              10,
              47,
              0,
              0,
              0,
              0
            ],
            [
              0,
              4,
              0,
              110,
              28,
              1,
              0
            ],
            [
              0,
              0,
              0,
              21,
              46,
              24,
              9
            ],
            [
              0,
              0,
              0,
              4,
              16,
              23,
              20
            ],
            [
              0,
              0,
              0,
              0,
              6,
              24,
              45
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.935584434710217
      }
    },
    "mlp": {
      "architecture": "PyTorch MLP, 53 -> 128 -> 64 -> 7, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
      "framework": "pytorch",
      "test_metrics": {
        "model": "mlp",
        "accuracy": 0.6427350427350428,
        "macro_f1": 0.6275373447450349,
        "weighted_f1": 0.6380162402905546,
        "per_class_f1": {
          "target_reconnaissance": 0.8313253012048193,
          "infrastructure_setup": 0.7017543859649122,
          "lure_crafting": 0.5606060606060606,
          "email_delivery": 0.7612456747404844,
          "victim_engagement": 0.3867403314917127,
          "credential_harvesting": 0.43410852713178294,
          "post_compromise_escalation": 0.7169811320754716
        },
        "confusion_matrix": {
          "labels": [
            "target_reconnaissance",
            "infrastructure_setup",
            "lure_crafting",
            "email_delivery",
            "victim_engagement",
            "credential_harvesting",
            "post_compromise_escalation"
          ],
          "matrix": [
            [
              69,
              1,
              14,
              0,
              0,
              0,
              0
            ],
            [
              0,
              40,
              13,
              0,
              0,
              0,
              0
            ],
            [
              13,
              17,
              37,
              0,
              0,
              0,
              0
            ],
            [
              0,
              3,
              1,
              110,
              23,
              6,
              0
            ],
            [
              0,
              0,
              0,
              32,
              35,
              21,
              12
            ],
            [
              0,
              0,
              0,
              4,
              16,
              28,
              15
            ],
            [
              0,
              0,
              0,
              0,
              7,
              11,
              57
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.9264812360054401
      }
    }
  }
}