{ "purpose": "Quantify how much the session-aggregate features contribute to the headline number. Trained with identical architecture on the same split, with session features dropped.", "session_features_dropped": [ "payload_entropy_mean", "retransmission_rate", "protocol_violation_count", "c2_beacon_flag", "session_risk_score" ], "n_features_full": 101, "n_features_flow_only": 96, "full_model_metrics": { "model": "xgboost", "accuracy": 0.9979536152796725, "macro_f1": 0.9961123729105247, "weighted_f1": 0.9979537067605843, "per_class_f1": { "BENIGN": 0.9985761746559089, "MALICIOUS": 0.9983079526226735, "AMBIGUOUS": 0.9914529914529915 }, "confusion_matrix": { "labels": [ "BENIGN", "MALICIOUS", "AMBIGUOUS" ], "matrix": [ [ 1052, 1, 1 ], [ 0, 295, 0 ], [ 1, 0, 116 ] ] }, "macro_roc_auc_ovr": 0.9999888611978185 }, "flow_only_model_metrics": { "model": "xgboost_flow_only", "accuracy": 0.9884038199181446, "macro_f1": 0.9776308066176851, "weighted_f1": 0.9883464558152856, "per_class_f1": { "BENIGN": 0.9933774834437086, "MALICIOUS": 0.9829931972789115, "AMBIGUOUS": 0.9565217391304348 }, "confusion_matrix": { "labels": [ "BENIGN", "MALICIOUS", "AMBIGUOUS" ], "matrix": [ [ 1050, 2, 2 ], [ 5, 289, 1 ], [ 5, 2, 110 ] ] }, "macro_roc_auc_ovr": 0.9988745635051176 }, "interpretation": "Removing session aggregates costs roughly 1 percentage point of accuracy. The model is not session-dominated; the flow-level features carry the bulk of the signal." }