File size: 11,206 Bytes
e2c4702 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 | {
"purpose": "CYB010 sample has extensive structural leakage in two places: the per-event phase/profile labels are oracled by the mitre_tactic == 'benign' marker and the threat_actor_id == 'NONE' marker (both perfect benign indicators), and the per-alert label_true_positive target is oracled by SEVEN separate columns including the alert_category, alert_rule_name, time_to_detect_seconds sentinel, correlated_chain_length sentinel, analyst_triage_priority, and suppression_reason fields. The published baseline (attack_lifecycle_phase 5-class) trains with the four phase oracles excluded.",
"primary_target": "attack_lifecycle_phase (5-class, per-event)",
"split": "GroupShuffleSplit on incident_id, 70/15/15 nested",
"oracle_paths_documented": {
"P1_mitre_tactic_benign": {
"target": "attack_lifecycle_phase == 'benign_background'",
"leak_column": "mitre_tactic",
"mechanism": "All events with mitre_tactic == 'benign' are in benign_background phase; all events in benign_background have mitre_tactic == 'benign'. Perfect bidirectional oracle (12,448 of 12,448 cases).",
"evidence_counts": {
"tactic_benign_AND_phase_benign": 12448,
"tactic_benign_AND_phase_other": 0,
"tactic_attack_AND_phase_benign": 0
},
"verdict": "Perfect oracle for benign_background phase."
},
"P2_mitre_technique_id": {
"target": "mitre_tactic",
"leak_column": "mitre_technique_id",
"mechanism": "By ATT&CK design, each MITRE technique (T-number) belongs to exactly one tactic. 100% of techniques in the sample (54 of 54) map deterministically to a single tactic. Indirect oracle for phase via the mitre_tactic chain.",
"evidence": {
"n_unique_techniques": 54,
"techniques_mapping_to_single_tactic": 54,
"percent_oracle": 100.0
},
"verdict": "Perfect oracle for mitre_tactic; indirect for phase."
},
"P3_label_malicious": {
"target": "attack_lifecycle_phase == 'benign_background'",
"leak_column": "label_malicious",
"mechanism": "label_malicious is False if and only if the event is in benign_background phase. Perfect bidirectional encoding.",
"evidence_counts": {
"label_malicious_False_AND_phase_benign": 12448,
"label_malicious_False_AND_phase_other": 0
},
"verdict": "Perfect oracle for benign_background phase."
},
"P4_threat_actor_id_NONE": {
"target": "attack_lifecycle_phase == 'benign_background'",
"leak_column": "threat_actor_id",
"mechanism": "threat_actor_id has 11 values: 10 ACTOR-XXXX labels (one per malicious actor) plus 'NONE' for benign events. threat_actor_id == 'NONE' is a perfect oracle for benign phase; the 10 ACTOR-XXXX values are perfect oracles for non-benign phase.",
"evidence_counts": {
"actor_NONE_AND_phase_benign": 12448,
"actor_NONE_AND_phase_other": 0
},
"verdict": "Perfect oracle for benign_background phase."
},
"P5_threat_actor_profile_benign": {
"target": "attack_lifecycle_phase == 'benign_background'",
"leak_column": "threat_actor_profile",
"mechanism": "threat_actor_profile == 'benign_user' is a perfect oracle for benign_background phase. The 4 non-benign profile values (apt, nation_state, insider, script_kiddie) all indicate non-benign phase.",
"evidence_counts": {
"profile_benign_user_AND_phase_benign": 12448
},
"verdict": "Perfect oracle for benign_background phase."
},
"P6_event_type_phase": {
"target": "attack_lifecycle_phase (multiple phases)",
"leak_column": "event_type",
"mechanism": "Many event_type values are phase-specific. For example, 'c2_beacon_outbound' (6,158 events) maps to exfiltration_or_impact with 100% purity. Other event types similarly map to specific phases.",
"near_oracle_event_types": {
"c2_beacon_outbound": {
"maps_to": "exfiltration_or_impact",
"purity": 0.9514,
"n_events": 6158
},
"credential_dumping_attempt": {
"maps_to": "benign_background",
"purity": 0.9518,
"n_events": 166
},
"process_hollowing_detected": {
"maps_to": "benign_background",
"purity": 0.9527,
"n_events": 169
}
},
"n_event_types_with_purity_above_95pct": 3,
"verdict": "Strong near-oracle for multiple phases. Dropped."
},
"A1_alert_category_FP_noise": {
"target": "label_true_positive (alerts)",
"leak_column": "alert_category",
"mechanism": "alert_category == 'false_positive_noise' is a perfect oracle for label_true_positive == False (2,721 of 2,721 noise alerts are FP; all 14 other categories are 100% TP).",
"verdict": "Perfect oracle."
},
"A2_label_false_positive_mirror": {
"target": "label_true_positive (alerts)",
"leak_column": "label_false_positive",
"mechanism": "label_false_positive is exactly NOT label_true_positive (verified across all 5,162 alerts). Same target.",
"verdict": "Perfect oracle (mirror target)."
},
"A3_time_to_detect_sentinel": {
"target": "label_true_positive (alerts)",
"leak_column": "time_to_detect_seconds",
"mechanism": "FP alerts have time_to_detect_seconds == 0 (sentinel for 'no detection time because it's a false positive'). TP alerts have detection times ranging 240 to 2,592,000 seconds. Perfect oracle.",
"evidence": {
"FP_alerts_time_zero": 2721,
"TP_alerts_time_zero": 0
},
"verdict": "Perfect oracle."
},
"A4_correlated_chain_sentinel": {
"target": "label_true_positive (alerts)",
"leak_column": "correlated_chain_length",
"mechanism": "FP alerts always have correlated_chain_length == 1 (no correlation possible because false positives don't chain). TP alerts have chain length 1-20 with mean 3.14. Perfect oracle when chain_length > 1; chain_length == 1 still allows some TPs.",
"verdict": "Strong oracle - chain_length > 1 perfectly identifies TP."
},
"A5_analyst_triage_priority": {
"target": "label_true_positive (alerts)",
"leak_column": "analyst_triage_priority",
"mechanism": "P1, P2, P3 priorities are 100% TP (1,609 alerts total). P4 splits 76% FP / 24% TP. The P1/P2/P3 indicator alone is a perfect oracle for TP within those alerts.",
"evidence_counts": {
"P1": {
"false": 0,
"true": 131
},
"P2": {
"false": 0,
"true": 432
},
"P3": {
"false": 0,
"true": 1046
},
"P4": {
"false": 2721,
"true": 832
}
},
"verdict": "Strong oracle (perfect for P1/P2/P3)."
},
"A6_suppression_reason": {
"target": "label_true_positive (alerts)",
"leak_column": "suppression_reason",
"mechanism": "suppression_reason is NaN if and only if the alert is TP (1,744 of 1,744 NaN values are TP). Any non-NaN suppression reason is 79-82% FP. Strong oracle.",
"verdict": "Strong oracle."
},
"A7_alert_rule_name": {
"target": "label_true_positive (alerts)",
"leak_column": "alert_rule_name",
"mechanism": "alert_rule_name often encodes the answer (rules with 'false_positive' or 'noise' in name map deterministically to FP; rules with attack-specific names map to TP).",
"verdict": "Strong oracle by rule naming convention."
}
},
"unlearnable_targets": [
{
"target": "threat_actor_profile 4-class (malicious events only)",
"n_classes": 4,
"n_events": 9448,
"majority_baseline": 0.6110287891617273,
"honest_accuracy": 0.5543902985277928,
"honest_roc_auc": 0.7473176763614474,
"verdict": "below_majority",
"note": "After filtering to malicious events only and dropping all phase/tactic oracles, threat actor attribution is below majority baseline. The 5-class formulation works only because benign_user separation is trivial (which is a structural oracle finding)."
},
{
"target": "event_class 12-class (per-event)",
"n_classes": 12,
"majority_baseline": 0.4211728169528681,
"honest_accuracy": 0.3508069868328931,
"verdict": "below_majority",
"note": "event_class is a structural property of the event itself (e.g. network_flow, authentication, endpoint_process) and is not learnable from other features without leaking event_type."
}
],
"alert_task_findings": {
"task": "label_true_positive binary on alert_records (5,162 alerts)",
"with_oracles_intact_accuracy": 1.0,
"with_oracles_intact_note": "100% test accuracy with any single oracle column present",
"honest_accuracy_mean_3seeds": 0.7636892643739505,
"honest_roc_auc_mean_3seeds": 0.8541442200259074,
"majority_baseline": 0.5271212708252615,
"interpretation": "After dropping all 7 oracle columns, honest XGBoost achieves acc 0.764 and AUC 0.854 on the alert TP task - real signal from severity_level, siem_platform_type, suppressed_flag, and host context features. This is a viable secondary task but is NOT the published baseline (the per-event attack_lifecycle_phase task is)."
},
"unlearnable_summary": "Two README-suggested targets are unlearnable on the sample after honest oracle removal: threat_actor_profile 4-class (malicious-only) and event_class 12-class. The 5-class threat_actor_profile WITH benign included is technically viable (acc 0.84) but per-class F1 reveals it's almost entirely driven by benign_user separation (F1 1.00 vs F1 0.17-0.69 for the 4 malicious classes). Hence the published primary target is attack_lifecycle_phase 5-class.",
"recommendations_to_dataset_author": [
"Remove the threat_actor_id == 'NONE' sentinel for benign events. Use a per-event mask or a separate benign-actor pool with realistic actor IDs.",
"Replace the mitre_tactic == 'benign' marker with phase-specific tactic distributions (e.g. benign events should sample from realistic non-malicious tactic-free patterns, not all share a 'benign' value).",
"Make event_type less deterministic per phase. 'c2_beacon_outbound' should appear in a few different phases with phase-specific frequencies, not 100% in exfiltration.",
"Replace time_to_detect_seconds == 0 sentinel for FP alerts with realistic detection-time distributions; FP alerts can still have a 'time to detection' value (the time to dismiss).",
"Replace correlated_chain_length == 1 sentinel for FP with occasional 2-3 chains (real noise sometimes correlates).",
"Replace analyst_triage_priority P1/P2/P3 -> 100% TP with realistic uncertainty; some P1 alerts are FPs in real data.",
"Make alert_category names less revealing - rule names like 'false_positive_noise' deterministically encode the label. Use abstract rule IDs and have the FP label come from outcome statistics, not the rule name.",
"To enable threat_actor_profile 4-class learning, add stronger per-actor feature signatures - APT vs nation_state should have distinct host targeting, dwell time per host, and log_source affinity. Current overlap is too tight."
]
} |