validation_results.json · xpertsystems/cyb009-baseline-classifier at main

File size: 7,372 Bytes

e520bf1

{
  "version": "1.0.0",
  "dataset": "xpertsystems/cyb009-sample",
  "task": "8-class vulnerability_class classification (CWE-style families)",
  "baselines": {
    "always_predict_majority_accuracy": 0.17676767676767677,
    "majority_class": "memory_corruption",
    "random_guess_accuracy": 0.125
  },
  "split": {
    "strategy": "stratified (StratifiedShuffleSplit, nested 70/15/15)",
    "rationale": "Per-vulnerability task (n=2638), one row per vuln. Stratified random splitting preserves class distribution. No row-correlation structure to leak.",
    "vulns_train": 1846,
    "vulns_val": 396,
    "vulns_test": 396,
    "seed": 42
  },
  "n_features": 57,
  "label_classes": [
    "auth_access_control",
    "cryptographic_failure",
    "information_disclosure",
    "injection_family",
    "logic_flaw",
    "memory_corruption",
    "misconfiguration",
    "supply_chain_weakness"
  ],
  "class_distribution_train": {
    "memory_corruption": 325,
    "injection_family": 305,
    "misconfiguration": 305,
    "auth_access_control": 245,
    "cryptographic_failure": 211,
    "supply_chain_weakness": 189,
    "logic_flaw": 160,
    "information_disclosure": 106
  },
  "class_distribution_test": {
    "memory_corruption": 70,
    "misconfiguration": 65,
    "injection_family": 65,
    "auth_access_control": 53,
    "cryptographic_failure": 45,
    "supply_chain_weakness": 41,
    "logic_flaw": 34,
    "information_disclosure": 23
  },
  "outcome_leak_excluded_features": [
    "exploit_maturity_final (indirect leak via CVSS temporal multiplier)",
    "cvss_temporal_score_final (near-deterministic per exploit_maturity_final tier)",
    "time_to_exploit_days (sentinel -1 / positive)",
    "time_to_remediate_days (sentinel 120 / lower)",
    "patch_lag_days (suspected similar sentinel - precaution)",
    "risk_score_composite (computed from flag fields - precaution)"
  ],
  "leakage_audit_note": "CYB009 has the most pervasive structural leakage of any SKU in the XpertSystems catalog. See leakage_diagnostic.json for the full 8-oracle-path audit. Six of the README's headline use cases are unlearnable on the sample after honest leak removal; vulnerability_class is the only viable target and gives the catalog's weakest baseline by design.",
  "models": {
    "xgboost": {
      "architecture": "Gradient-boosted decision trees, multi:softprob, 8 classes",
      "framework": "xgboost",
      "test_metrics": {
        "model": "xgboost",
        "accuracy": 0.23737373737373738,
        "macro_f1": 0.22437482872901052,
        "weighted_f1": 0.23213786276177156,
        "per_class_f1": {
          "auth_access_control": 0.14583333333333334,
          "cryptographic_failure": 0.21686746987951808,
          "information_disclosure": 0.2909090909090909,
          "injection_family": 0.23728813559322035,
          "logic_flaw": 0.08955223880597014,
          "memory_corruption": 0.3333333333333333,
          "misconfiguration": 0.2589928057553957,
          "supply_chain_weakness": 0.2222222222222222
        },
        "confusion_matrix": {
          "labels": [
            "auth_access_control",
            "cryptographic_failure",
            "information_disclosure",
            "injection_family",
            "logic_flaw",
            "memory_corruption",
            "misconfiguration",
            "supply_chain_weakness"
          ],
          "matrix": [
            [
              7,
              7,
              0,
              11,
              6,
              10,
              7,
              5
            ],
            [
              4,
              9,
              3,
              5,
              3,
              5,
              16,
              0
            ],
            [
              3,
              0,
              8,
              1,
              4,
              0,
              7,
              0
            ],
            [
              3,
              6,
              1,
              14,
              8,
              20,
              6,
              7
            ],
            [
              4,
              4,
              5,
              3,
              3,
              2,
              13,
              0
            ],
            [
              11,
              3,
              0,
              13,
              3,
              27,
              5,
              8
            ],
            [
              6,
              9,
              15,
              2,
              5,
              7,
              18,
              3
            ],
            [
              5,
              0,
              0,
              4,
              1,
              21,
              2,
              8
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.6837125710196055
      }
    },
    "mlp": {
      "architecture": "PyTorch MLP, 57 -> 128 -> 64 -> 8, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
      "framework": "pytorch",
      "test_metrics": {
        "model": "mlp",
        "accuracy": 0.23232323232323232,
        "macro_f1": 0.22092024769409177,
        "weighted_f1": 0.22940625794114217,
        "per_class_f1": {
          "auth_access_control": 0.16279069767441862,
          "cryptographic_failure": 0.16842105263157894,
          "information_disclosure": 0.15384615384615385,
          "injection_family": 0.23529411764705882,
          "logic_flaw": 0.22784810126582278,
          "memory_corruption": 0.36486486486486486,
          "misconfiguration": 0.16216216216216217,
          "supply_chain_weakness": 0.29213483146067415
        },
        "confusion_matrix": {
          "labels": [
            "auth_access_control",
            "cryptographic_failure",
            "information_disclosure",
            "injection_family",
            "logic_flaw",
            "memory_corruption",
            "misconfiguration",
            "supply_chain_weakness"
          ],
          "matrix": [
            [
              7,
              8,
              1,
              12,
              6,
              12,
              4,
              3
            ],
            [
              5,
              8,
              4,
              3,
              5,
              5,
              14,
              1
            ],
            [
              1,
              3,
              5,
              2,
              5,
              1,
              6,
              0
            ],
            [
              3,
              7,
              3,
              14,
              6,
              17,
              2,
              13
            ],
            [
              1,
              5,
              9,
              3,
              9,
              1,
              6,
              0
            ],
            [
              8,
              7,
              0,
              9,
              3,
              27,
              2,
              14
            ],
            [
              3,
              10,
              20,
              5,
              10,
              4,
              9,
              4
            ],
            [
              5,
              2,
              0,
              6,
              1,
              11,
              3,
              13
            ]
          ]
        },
        "macro_roc_auc_ovr": 0.6899177016524518
      }
    }
  }
}