{ "purpose": "Multi-seed evaluation across 10 stratified splits of the 2,638-vulnerability sample.", "seeds_evaluated": [ 42, 7, 13, 17, 23, 31, 45, 99, 123, 200 ], "per_seed": [ { "seed": 42, "test_n_classes": 8, "accuracy": 0.23737373737373738, "macro_f1": 0.22437482872901052, "macro_roc_auc_ovr": 0.6837125710196055 }, { "seed": 7, "test_n_classes": 8, "accuracy": 0.2222222222222222, "macro_f1": 0.2093010862619929, "macro_roc_auc_ovr": 0.6598529124901316 }, { "seed": 13, "test_n_classes": 8, "accuracy": 0.2398989898989899, "macro_f1": 0.2307013362941505, "macro_roc_auc_ovr": 0.6859754559014113 }, { "seed": 17, "test_n_classes": 8, "accuracy": 0.2828282828282828, "macro_f1": 0.2641998881222478, "macro_roc_auc_ovr": 0.7001133264273626 }, { "seed": 23, "test_n_classes": 8, "accuracy": 0.22474747474747475, "macro_f1": 0.20938909311730927, "macro_roc_auc_ovr": 0.6952258894131303 }, { "seed": 31, "test_n_classes": 8, "accuracy": 0.25252525252525254, "macro_f1": 0.23228517698591994, "macro_roc_auc_ovr": 0.6868917272897719 }, { "seed": 45, "test_n_classes": 8, "accuracy": 0.2601010101010101, "macro_f1": 0.23328085381091487, "macro_roc_auc_ovr": 0.6955734168438206 }, { "seed": 99, "test_n_classes": 8, "accuracy": 0.21717171717171718, "macro_f1": 0.2064102665659866, "macro_roc_auc_ovr": 0.700000049204532 }, { "seed": 123, "test_n_classes": 8, "accuracy": 0.2222222222222222, "macro_f1": 0.20983049912880922, "macro_roc_auc_ovr": 0.662519489088299 }, { "seed": 200, "test_n_classes": 8, "accuracy": 0.2828282828282828, "macro_f1": 0.2801905278759914, "macro_roc_auc_ovr": 0.6954305041778505 } ], "aggregate": { "accuracy_mean": 0.2441919191919192, "accuracy_std": 0.023337760304165702, "accuracy_min": 0.21717171717171718, "accuracy_max": 0.2828282828282828, "macro_f1_mean": 0.22999635568923332, "macro_f1_std": 0.023565611735295866, "roc_auc_mean": 0.6865295341855916, "roc_auc_std": 0.013780848086567432 }, "published_artifact_seed": 42 }