cyb009-baseline-classifier / validation_results.json
pradeep-xpert's picture
Initial release: vulnerability_class baseline + comprehensive 8-oracle-path leakage diagnostic on CYB009 sample
e520bf1 verified
{
"version": "1.0.0",
"dataset": "xpertsystems/cyb009-sample",
"task": "8-class vulnerability_class classification (CWE-style families)",
"baselines": {
"always_predict_majority_accuracy": 0.17676767676767677,
"majority_class": "memory_corruption",
"random_guess_accuracy": 0.125
},
"split": {
"strategy": "stratified (StratifiedShuffleSplit, nested 70/15/15)",
"rationale": "Per-vulnerability task (n=2638), one row per vuln. Stratified random splitting preserves class distribution. No row-correlation structure to leak.",
"vulns_train": 1846,
"vulns_val": 396,
"vulns_test": 396,
"seed": 42
},
"n_features": 57,
"label_classes": [
"auth_access_control",
"cryptographic_failure",
"information_disclosure",
"injection_family",
"logic_flaw",
"memory_corruption",
"misconfiguration",
"supply_chain_weakness"
],
"class_distribution_train": {
"memory_corruption": 325,
"injection_family": 305,
"misconfiguration": 305,
"auth_access_control": 245,
"cryptographic_failure": 211,
"supply_chain_weakness": 189,
"logic_flaw": 160,
"information_disclosure": 106
},
"class_distribution_test": {
"memory_corruption": 70,
"misconfiguration": 65,
"injection_family": 65,
"auth_access_control": 53,
"cryptographic_failure": 45,
"supply_chain_weakness": 41,
"logic_flaw": 34,
"information_disclosure": 23
},
"outcome_leak_excluded_features": [
"exploit_maturity_final (indirect leak via CVSS temporal multiplier)",
"cvss_temporal_score_final (near-deterministic per exploit_maturity_final tier)",
"time_to_exploit_days (sentinel -1 / positive)",
"time_to_remediate_days (sentinel 120 / lower)",
"patch_lag_days (suspected similar sentinel - precaution)",
"risk_score_composite (computed from flag fields - precaution)"
],
"leakage_audit_note": "CYB009 has the most pervasive structural leakage of any SKU in the XpertSystems catalog. See leakage_diagnostic.json for the full 8-oracle-path audit. Six of the README's headline use cases are unlearnable on the sample after honest leak removal; vulnerability_class is the only viable target and gives the catalog's weakest baseline by design.",
"models": {
"xgboost": {
"architecture": "Gradient-boosted decision trees, multi:softprob, 8 classes",
"framework": "xgboost",
"test_metrics": {
"model": "xgboost",
"accuracy": 0.23737373737373738,
"macro_f1": 0.22437482872901052,
"weighted_f1": 0.23213786276177156,
"per_class_f1": {
"auth_access_control": 0.14583333333333334,
"cryptographic_failure": 0.21686746987951808,
"information_disclosure": 0.2909090909090909,
"injection_family": 0.23728813559322035,
"logic_flaw": 0.08955223880597014,
"memory_corruption": 0.3333333333333333,
"misconfiguration": 0.2589928057553957,
"supply_chain_weakness": 0.2222222222222222
},
"confusion_matrix": {
"labels": [
"auth_access_control",
"cryptographic_failure",
"information_disclosure",
"injection_family",
"logic_flaw",
"memory_corruption",
"misconfiguration",
"supply_chain_weakness"
],
"matrix": [
[
7,
7,
0,
11,
6,
10,
7,
5
],
[
4,
9,
3,
5,
3,
5,
16,
0
],
[
3,
0,
8,
1,
4,
0,
7,
0
],
[
3,
6,
1,
14,
8,
20,
6,
7
],
[
4,
4,
5,
3,
3,
2,
13,
0
],
[
11,
3,
0,
13,
3,
27,
5,
8
],
[
6,
9,
15,
2,
5,
7,
18,
3
],
[
5,
0,
0,
4,
1,
21,
2,
8
]
]
},
"macro_roc_auc_ovr": 0.6837125710196055
}
},
"mlp": {
"architecture": "PyTorch MLP, 57 -> 128 -> 64 -> 8, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
"framework": "pytorch",
"test_metrics": {
"model": "mlp",
"accuracy": 0.23232323232323232,
"macro_f1": 0.22092024769409177,
"weighted_f1": 0.22940625794114217,
"per_class_f1": {
"auth_access_control": 0.16279069767441862,
"cryptographic_failure": 0.16842105263157894,
"information_disclosure": 0.15384615384615385,
"injection_family": 0.23529411764705882,
"logic_flaw": 0.22784810126582278,
"memory_corruption": 0.36486486486486486,
"misconfiguration": 0.16216216216216217,
"supply_chain_weakness": 0.29213483146067415
},
"confusion_matrix": {
"labels": [
"auth_access_control",
"cryptographic_failure",
"information_disclosure",
"injection_family",
"logic_flaw",
"memory_corruption",
"misconfiguration",
"supply_chain_weakness"
],
"matrix": [
[
7,
8,
1,
12,
6,
12,
4,
3
],
[
5,
8,
4,
3,
5,
5,
14,
1
],
[
1,
3,
5,
2,
5,
1,
6,
0
],
[
3,
7,
3,
14,
6,
17,
2,
13
],
[
1,
5,
9,
3,
9,
1,
6,
0
],
[
8,
7,
0,
9,
3,
27,
2,
14
],
[
3,
10,
20,
5,
10,
4,
9,
4
],
[
5,
2,
0,
6,
1,
11,
3,
13
]
]
},
"macro_roc_auc_ovr": 0.6899177016524518
}
}
}
}