{
  "purpose": "Quantify how much the session-aggregate features contribute to the headline number. Trained with identical architecture on the same split, with session features dropped.",
  "session_features_dropped": [
    "payload_entropy_mean",
    "retransmission_rate",
    "protocol_violation_count",
    "c2_beacon_flag",
    "session_risk_score"
  ],
  "n_features_full": 101,
  "n_features_flow_only": 96,
  "full_model_metrics": {
    "model": "xgboost",
    "accuracy": 0.9979536152796725,
    "macro_f1": 0.9961123729105247,
    "weighted_f1": 0.9979537067605843,
    "per_class_f1": {
      "BENIGN": 0.9985761746559089,
      "MALICIOUS": 0.9983079526226735,
      "AMBIGUOUS": 0.9914529914529915
    },
    "confusion_matrix": {
      "labels": [
        "BENIGN",
        "MALICIOUS",
        "AMBIGUOUS"
      ],
      "matrix": [
        [
          1052,
          1,
          1
        ],
        [
          0,
          295,
          0
        ],
        [
          1,
          0,
          116
        ]
      ]
    },
    "macro_roc_auc_ovr": 0.9999888611978185
  },
  "flow_only_model_metrics": {
    "model": "xgboost_flow_only",
    "accuracy": 0.9884038199181446,
    "macro_f1": 0.9776308066176851,
    "weighted_f1": 0.9883464558152856,
    "per_class_f1": {
      "BENIGN": 0.9933774834437086,
      "MALICIOUS": 0.9829931972789115,
      "AMBIGUOUS": 0.9565217391304348
    },
    "confusion_matrix": {
      "labels": [
        "BENIGN",
        "MALICIOUS",
        "AMBIGUOUS"
      ],
      "matrix": [
        [
          1050,
          2,
          2
        ],
        [
          5,
          289,
          1
        ],
        [
          5,
          2,
          110
        ]
      ]
    },
    "macro_roc_auc_ovr": 0.9988745635051176
  },
  "interpretation": "Removing session aggregates costs roughly 1 percentage point of accuracy. The model is not session-dominated; the flow-level features carry the bulk of the signal."
}