Initial release: attack_lifecycle_phase 5-class baseline + 11-oracle-path leakage diagnostic
e2c4702 verified | { | |
| "purpose": "CYB010 sample has extensive structural leakage in two places: the per-event phase/profile labels are oracled by the mitre_tactic == 'benign' marker and the threat_actor_id == 'NONE' marker (both perfect benign indicators), and the per-alert label_true_positive target is oracled by SEVEN separate columns including the alert_category, alert_rule_name, time_to_detect_seconds sentinel, correlated_chain_length sentinel, analyst_triage_priority, and suppression_reason fields. The published baseline (attack_lifecycle_phase 5-class) trains with the four phase oracles excluded.", | |
| "primary_target": "attack_lifecycle_phase (5-class, per-event)", | |
| "split": "GroupShuffleSplit on incident_id, 70/15/15 nested", | |
| "oracle_paths_documented": { | |
| "P1_mitre_tactic_benign": { | |
| "target": "attack_lifecycle_phase == 'benign_background'", | |
| "leak_column": "mitre_tactic", | |
| "mechanism": "All events with mitre_tactic == 'benign' are in benign_background phase; all events in benign_background have mitre_tactic == 'benign'. Perfect bidirectional oracle (12,448 of 12,448 cases).", | |
| "evidence_counts": { | |
| "tactic_benign_AND_phase_benign": 12448, | |
| "tactic_benign_AND_phase_other": 0, | |
| "tactic_attack_AND_phase_benign": 0 | |
| }, | |
| "verdict": "Perfect oracle for benign_background phase." | |
| }, | |
| "P2_mitre_technique_id": { | |
| "target": "mitre_tactic", | |
| "leak_column": "mitre_technique_id", | |
| "mechanism": "By ATT&CK design, each MITRE technique (T-number) belongs to exactly one tactic. 100% of techniques in the sample (54 of 54) map deterministically to a single tactic. Indirect oracle for phase via the mitre_tactic chain.", | |
| "evidence": { | |
| "n_unique_techniques": 54, | |
| "techniques_mapping_to_single_tactic": 54, | |
| "percent_oracle": 100.0 | |
| }, | |
| "verdict": "Perfect oracle for mitre_tactic; indirect for phase." | |
| }, | |
| "P3_label_malicious": { | |
| "target": "attack_lifecycle_phase == 'benign_background'", | |
| "leak_column": "label_malicious", | |
| "mechanism": "label_malicious is False if and only if the event is in benign_background phase. Perfect bidirectional encoding.", | |
| "evidence_counts": { | |
| "label_malicious_False_AND_phase_benign": 12448, | |
| "label_malicious_False_AND_phase_other": 0 | |
| }, | |
| "verdict": "Perfect oracle for benign_background phase." | |
| }, | |
| "P4_threat_actor_id_NONE": { | |
| "target": "attack_lifecycle_phase == 'benign_background'", | |
| "leak_column": "threat_actor_id", | |
| "mechanism": "threat_actor_id has 11 values: 10 ACTOR-XXXX labels (one per malicious actor) plus 'NONE' for benign events. threat_actor_id == 'NONE' is a perfect oracle for benign phase; the 10 ACTOR-XXXX values are perfect oracles for non-benign phase.", | |
| "evidence_counts": { | |
| "actor_NONE_AND_phase_benign": 12448, | |
| "actor_NONE_AND_phase_other": 0 | |
| }, | |
| "verdict": "Perfect oracle for benign_background phase." | |
| }, | |
| "P5_threat_actor_profile_benign": { | |
| "target": "attack_lifecycle_phase == 'benign_background'", | |
| "leak_column": "threat_actor_profile", | |
| "mechanism": "threat_actor_profile == 'benign_user' is a perfect oracle for benign_background phase. The 4 non-benign profile values (apt, nation_state, insider, script_kiddie) all indicate non-benign phase.", | |
| "evidence_counts": { | |
| "profile_benign_user_AND_phase_benign": 12448 | |
| }, | |
| "verdict": "Perfect oracle for benign_background phase." | |
| }, | |
| "P6_event_type_phase": { | |
| "target": "attack_lifecycle_phase (multiple phases)", | |
| "leak_column": "event_type", | |
| "mechanism": "Many event_type values are phase-specific. For example, 'c2_beacon_outbound' (6,158 events) maps to exfiltration_or_impact with 100% purity. Other event types similarly map to specific phases.", | |
| "near_oracle_event_types": { | |
| "c2_beacon_outbound": { | |
| "maps_to": "exfiltration_or_impact", | |
| "purity": 0.9514, | |
| "n_events": 6158 | |
| }, | |
| "credential_dumping_attempt": { | |
| "maps_to": "benign_background", | |
| "purity": 0.9518, | |
| "n_events": 166 | |
| }, | |
| "process_hollowing_detected": { | |
| "maps_to": "benign_background", | |
| "purity": 0.9527, | |
| "n_events": 169 | |
| } | |
| }, | |
| "n_event_types_with_purity_above_95pct": 3, | |
| "verdict": "Strong near-oracle for multiple phases. Dropped." | |
| }, | |
| "A1_alert_category_FP_noise": { | |
| "target": "label_true_positive (alerts)", | |
| "leak_column": "alert_category", | |
| "mechanism": "alert_category == 'false_positive_noise' is a perfect oracle for label_true_positive == False (2,721 of 2,721 noise alerts are FP; all 14 other categories are 100% TP).", | |
| "verdict": "Perfect oracle." | |
| }, | |
| "A2_label_false_positive_mirror": { | |
| "target": "label_true_positive (alerts)", | |
| "leak_column": "label_false_positive", | |
| "mechanism": "label_false_positive is exactly NOT label_true_positive (verified across all 5,162 alerts). Same target.", | |
| "verdict": "Perfect oracle (mirror target)." | |
| }, | |
| "A3_time_to_detect_sentinel": { | |
| "target": "label_true_positive (alerts)", | |
| "leak_column": "time_to_detect_seconds", | |
| "mechanism": "FP alerts have time_to_detect_seconds == 0 (sentinel for 'no detection time because it's a false positive'). TP alerts have detection times ranging 240 to 2,592,000 seconds. Perfect oracle.", | |
| "evidence": { | |
| "FP_alerts_time_zero": 2721, | |
| "TP_alerts_time_zero": 0 | |
| }, | |
| "verdict": "Perfect oracle." | |
| }, | |
| "A4_correlated_chain_sentinel": { | |
| "target": "label_true_positive (alerts)", | |
| "leak_column": "correlated_chain_length", | |
| "mechanism": "FP alerts always have correlated_chain_length == 1 (no correlation possible because false positives don't chain). TP alerts have chain length 1-20 with mean 3.14. Perfect oracle when chain_length > 1; chain_length == 1 still allows some TPs.", | |
| "verdict": "Strong oracle - chain_length > 1 perfectly identifies TP." | |
| }, | |
| "A5_analyst_triage_priority": { | |
| "target": "label_true_positive (alerts)", | |
| "leak_column": "analyst_triage_priority", | |
| "mechanism": "P1, P2, P3 priorities are 100% TP (1,609 alerts total). P4 splits 76% FP / 24% TP. The P1/P2/P3 indicator alone is a perfect oracle for TP within those alerts.", | |
| "evidence_counts": { | |
| "P1": { | |
| "false": 0, | |
| "true": 131 | |
| }, | |
| "P2": { | |
| "false": 0, | |
| "true": 432 | |
| }, | |
| "P3": { | |
| "false": 0, | |
| "true": 1046 | |
| }, | |
| "P4": { | |
| "false": 2721, | |
| "true": 832 | |
| } | |
| }, | |
| "verdict": "Strong oracle (perfect for P1/P2/P3)." | |
| }, | |
| "A6_suppression_reason": { | |
| "target": "label_true_positive (alerts)", | |
| "leak_column": "suppression_reason", | |
| "mechanism": "suppression_reason is NaN if and only if the alert is TP (1,744 of 1,744 NaN values are TP). Any non-NaN suppression reason is 79-82% FP. Strong oracle.", | |
| "verdict": "Strong oracle." | |
| }, | |
| "A7_alert_rule_name": { | |
| "target": "label_true_positive (alerts)", | |
| "leak_column": "alert_rule_name", | |
| "mechanism": "alert_rule_name often encodes the answer (rules with 'false_positive' or 'noise' in name map deterministically to FP; rules with attack-specific names map to TP).", | |
| "verdict": "Strong oracle by rule naming convention." | |
| } | |
| }, | |
| "unlearnable_targets": [ | |
| { | |
| "target": "threat_actor_profile 4-class (malicious events only)", | |
| "n_classes": 4, | |
| "n_events": 9448, | |
| "majority_baseline": 0.6110287891617273, | |
| "honest_accuracy": 0.5543902985277928, | |
| "honest_roc_auc": 0.7473176763614474, | |
| "verdict": "below_majority", | |
| "note": "After filtering to malicious events only and dropping all phase/tactic oracles, threat actor attribution is below majority baseline. The 5-class formulation works only because benign_user separation is trivial (which is a structural oracle finding)." | |
| }, | |
| { | |
| "target": "event_class 12-class (per-event)", | |
| "n_classes": 12, | |
| "majority_baseline": 0.4211728169528681, | |
| "honest_accuracy": 0.3508069868328931, | |
| "verdict": "below_majority", | |
| "note": "event_class is a structural property of the event itself (e.g. network_flow, authentication, endpoint_process) and is not learnable from other features without leaking event_type." | |
| } | |
| ], | |
| "alert_task_findings": { | |
| "task": "label_true_positive binary on alert_records (5,162 alerts)", | |
| "with_oracles_intact_accuracy": 1.0, | |
| "with_oracles_intact_note": "100% test accuracy with any single oracle column present", | |
| "honest_accuracy_mean_3seeds": 0.7636892643739505, | |
| "honest_roc_auc_mean_3seeds": 0.8541442200259074, | |
| "majority_baseline": 0.5271212708252615, | |
| "interpretation": "After dropping all 7 oracle columns, honest XGBoost achieves acc 0.764 and AUC 0.854 on the alert TP task - real signal from severity_level, siem_platform_type, suppressed_flag, and host context features. This is a viable secondary task but is NOT the published baseline (the per-event attack_lifecycle_phase task is)." | |
| }, | |
| "unlearnable_summary": "Two README-suggested targets are unlearnable on the sample after honest oracle removal: threat_actor_profile 4-class (malicious-only) and event_class 12-class. The 5-class threat_actor_profile WITH benign included is technically viable (acc 0.84) but per-class F1 reveals it's almost entirely driven by benign_user separation (F1 1.00 vs F1 0.17-0.69 for the 4 malicious classes). Hence the published primary target is attack_lifecycle_phase 5-class.", | |
| "recommendations_to_dataset_author": [ | |
| "Remove the threat_actor_id == 'NONE' sentinel for benign events. Use a per-event mask or a separate benign-actor pool with realistic actor IDs.", | |
| "Replace the mitre_tactic == 'benign' marker with phase-specific tactic distributions (e.g. benign events should sample from realistic non-malicious tactic-free patterns, not all share a 'benign' value).", | |
| "Make event_type less deterministic per phase. 'c2_beacon_outbound' should appear in a few different phases with phase-specific frequencies, not 100% in exfiltration.", | |
| "Replace time_to_detect_seconds == 0 sentinel for FP alerts with realistic detection-time distributions; FP alerts can still have a 'time to detection' value (the time to dismiss).", | |
| "Replace correlated_chain_length == 1 sentinel for FP with occasional 2-3 chains (real noise sometimes correlates).", | |
| "Replace analyst_triage_priority P1/P2/P3 -> 100% TP with realistic uncertainty; some P1 alerts are FPs in real data.", | |
| "Make alert_category names less revealing - rule names like 'false_positive_noise' deterministically encode the label. Use abstract rule IDs and have the FP label come from outcome statistics, not the rule name.", | |
| "To enable threat_actor_profile 4-class learning, add stronger per-actor feature signatures - APT vs nation_state should have distinct host targeting, dwell time per host, and log_source affinity. Current overlap is too tight." | |
| ] | |
| } |