{
  "version": "1.0.0",
  "dataset": "xpertsystems/cyb005-sample",
  "task": "4-class actor_capability_tier classification",
  "baselines": {
    "always_predict_majority_accuracy": 0.41348034856837984,
    "majority_class": "organised_syndicate",
    "random_guess_accuracy": 0.25
  },
  "split": {
    "strategy": "group_aware (GroupShuffleSplit by campaign_id, nested)",
    "rationale": "500 ransomware campaigns generate ~37,489 timesteps (75 per campaign). Random row-split would leak per-campaign correlations into the test fold. Group-aware split keeps train/val/test campaigns disjoint.",
    "campaigns_train": 350,
    "campaigns_val": 75,
    "campaigns_test": 75,
    "timesteps_train": 26242,
    "timesteps_val": 5624,
    "timesteps_test": 5623,
    "seed": 42
  },
  "n_features": 63,
  "label_classes": [
    "lone_actor",
    "organised_syndicate",
    "raas_affiliate",
    "nation_state_nexus"
  ],
  "class_distribution_train": {
    "organised_syndicate": 10423,
    "raas_affiliate": 7950,
    "lone_actor": 4125,
    "nation_state_nexus": 3744
  },
  "class_distribution_test": {
    "organised_syndicate": 2325,
    "raas_affiliate": 1725,
    "nation_state_nexus": 823,
    "lone_actor": 750
  },
  "leakage_excluded_features": [],
  "leakage_audit_notes": "Three columns were audited as potential tier oracles: attribution_risk_score (mean 0.016-0.026 with overlapping ranges - not an oracle, kept); living_off_land_score (mean 0.05-0.20 with large overlap - real observable, kept); attack_phase (no oracle relationship to tier - kept). detection_outcome contains a recovery_in_progress value that is 1:1 with the attack_phase of the same name, but this is a phase-prediction leak, not a tier-prediction one. No features dropped for this task.",
  "models": {
    "xgboost": {
      "architecture": "Gradient-boosted decision trees, multi:softprob, 4 classes",
      "framework": "xgboost",
      "test_metrics": {
        "model": "xgboost",
        "accuracy": 0.6898452783211808,
        "macro_f1": 0.6751447018282526,
        "weighted_f1": 0.6881356546405818,
        "per_class_f1": {
          "lone_actor": 0.6297297297297297,
          "organised_syndicate": 0.7391393864525427,
          "raas_affiliate": 0.6458906202260922,
          "nation_state_nexus": 0.6858190709046454
        },
        "confusion_matrix": {
          "labels": [
            "lone_actor",
            "organised_syndicate",
            "raas_affiliate",
            "nation_state_nexus"
          ],
          "matrix": [
            [
              466,
              67,
              216,
              1
            ],
            [
              83,
              1795,
              275,
              172
            ],
            [
              156,
              433,
              1057,
              79
            ],
            [
              25,
              237,
              0,
              561
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.873606865711172
      }
    },
    "mlp": {
      "architecture": "PyTorch MLP, 63 -> 128 -> 64 -> 4, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
      "framework": "pytorch",
      "test_metrics": {
        "model": "mlp",
        "accuracy": 0.5118264271741063,
        "macro_f1": 0.512148917800585,
        "weighted_f1": 0.5133102239521222,
        "per_class_f1": {
          "lone_actor": 0.427515633882888,
          "organised_syndicate": 0.5204107187578262,
          "raas_affiliate": 0.49878147847278637,
          "nation_state_nexus": 0.6018878400888396
        },
        "confusion_matrix": {
          "labels": [
            "lone_actor",
            "organised_syndicate",
            "raas_affiliate",
            "nation_state_nexus"
          ],
          "matrix": [
            [
              376,
              17,
              280,
              77
            ],
            [
              282,
              1039,
              745,
              259
            ],
            [
              248,
              456,
              921,
              100
            ],
            [
              103,
              156,
              22,
              542
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.8071564672462985
      }
    }
  }
}