File size: 5,161 Bytes

e6a6835

{
  "purpose": "Document why threat_actor_capability_tier (the README's stated headline use case) was NOT shipped as the primary baseline. Every oracle feature group is independently sufficient for 100% test accuracy on threat-actor binary detection; even with all 6 groups dropped, accuracy stays >97%. This is a structural property of the sample's generator (non-overlapping anomaly distributions between threat and legitimate sessions), not a methodology failure. Real-world identity telemetry has substantial overlap; this sample dataset does not reproduce it.",
  "target": "threat_actor_capability_tier != 'none' (binary)",
  "split": "GroupShuffleSplit by user_id, 70/15/15 nested",
  "non_overlapping_distributions": {
    "velocity_anomaly_score": {
      "actor_range": [
        0.5213,
        0.8181
      ],
      "non_actor_range": [
        0.0,
        0.2469
      ],
      "actor_mean": 0.651,
      "non_actor_mean": 0.053
    },
    "session_timestamp_utc": {
      "actor_range": [
        6417,
        1440062
      ],
      "non_actor_range": [
        1445187,
        18000137
      ],
      "note": "Actor sessions and non-actor sessions occupy disjoint time windows"
    },
    "credential_attempt_count": {
      "actor_range": [
        1,
        59
      ],
      "non_actor_range": [
        1,
        2
      ],
      "actor_mean": 12.9,
      "non_actor_mean": 1.07
    },
    "login_outcome": {
      "actor_only_values": [
        "failure_account_locked",
        "account_takeover_confirmed",
        "session_hijacked",
        "success_anomalous"
      ],
      "non_actor_only_values": [
        "success_normal"
      ],
      "note": "success_normal is 4306 non-actor / 0 actor rows; failure_account_locked is 0 non-actor / 186 actor rows."
    }
  },
  "ablation_experiments": [
    {
      "config": "full features (all oracles intact)",
      "n_features": 166,
      "accuracy": 1.0,
      "roc_auc": 1.0
    },
    {
      "config": "cumulative drop through behavioural_oracles",
      "dropped_so_far": [
        "velocity_anomaly_score",
        "credential_attempt_count",
        "session_timestamp_utc"
      ],
      "n_features": 163,
      "accuracy": 0.9991111111111112,
      "roc_auc": 1.0
    },
    {
      "config": "cumulative drop through outcome_oracle",
      "dropped_so_far": [
        "velocity_anomaly_score",
        "credential_attempt_count",
        "session_timestamp_utc",
        "login_outcome"
      ],
      "n_features": 154,
      "accuracy": 0.9982222222222222,
      "roc_auc": 0.9999714285714285
    },
    {
      "config": "cumulative drop through geo_oracle",
      "dropped_so_far": [
        "velocity_anomaly_score",
        "credential_attempt_count",
        "session_timestamp_utc",
        "login_outcome",
        "geo_country_code"
      ],
      "n_features": 138,
      "accuracy": 0.9986666666666667,
      "roc_auc": 0.9999619047619047
    },
    {
      "config": "cumulative drop through device_oracle",
      "dropped_so_far": [
        "velocity_anomaly_score",
        "credential_attempt_count",
        "session_timestamp_utc",
        "login_outcome",
        "geo_country_code",
        "device_trust_level"
      ],
      "n_features": 133,
      "accuracy": 0.9982222222222222,
      "roc_auc": 0.9999047619047619
    },
    {
      "config": "cumulative drop through user_risk_oracle",
      "dropped_so_far": [
        "velocity_anomaly_score",
        "credential_attempt_count",
        "session_timestamp_utc",
        "login_outcome",
        "geo_country_code",
        "device_trust_level",
        "user_risk_tier"
      ],
      "n_features": 130,
      "accuracy": 0.9977777777777778,
      "roc_auc": 0.9996095238095238
    },
    {
      "config": "cumulative drop through anomaly_signal",
      "dropped_so_far": [
        "velocity_anomaly_score",
        "credential_attempt_count",
        "session_timestamp_utc",
        "login_outcome",
        "geo_country_code",
        "device_trust_level",
        "user_risk_tier",
        "geo_anomaly_score"
      ],
      "n_features": 129,
      "accuracy": 0.9706666666666667,
      "roc_auc": 0.9896857142857143
    }
  ],
  "conclusion": "Even with all six oracle feature groups removed (40+ columns dropped), the residual feature set still yields 97% test accuracy and AUC 0.99 on threat-actor binary detection. The leakage is not localised \u2014 it is distributed across the entire feature space because the generator produces threat-actor sessions that are anomalous on every dimension simultaneously without overlap. A buyer planning to train a real detection model on this dataset should know that the sample's headline detection task is not a representative ML problem.",
  "recommendation_to_dataset_author": "Increase distributional overlap between threat-actor and legitimate session populations across all anomaly indicators: velocity score, credential attempt count, geo anomaly score, geo country code frequency, device trust level, login outcome class. Real-world detection systems operate at AUC 0.7-0.9, not 1.0; the sample should reflect that operating regime."
}