Initial release: vulnerability_class baseline + comprehensive 8-oracle-path leakage diagnostic on CYB009 sample
e520bf1 verified | { | |
| "purpose": "Multi-seed evaluation across 10 stratified splits of the 2,638-vulnerability sample.", | |
| "seeds_evaluated": [ | |
| 42, | |
| 7, | |
| 13, | |
| 17, | |
| 23, | |
| 31, | |
| 45, | |
| 99, | |
| 123, | |
| 200 | |
| ], | |
| "per_seed": [ | |
| { | |
| "seed": 42, | |
| "test_n_classes": 8, | |
| "accuracy": 0.23737373737373738, | |
| "macro_f1": 0.22437482872901052, | |
| "macro_roc_auc_ovr": 0.6837125710196055 | |
| }, | |
| { | |
| "seed": 7, | |
| "test_n_classes": 8, | |
| "accuracy": 0.2222222222222222, | |
| "macro_f1": 0.2093010862619929, | |
| "macro_roc_auc_ovr": 0.6598529124901316 | |
| }, | |
| { | |
| "seed": 13, | |
| "test_n_classes": 8, | |
| "accuracy": 0.2398989898989899, | |
| "macro_f1": 0.2307013362941505, | |
| "macro_roc_auc_ovr": 0.6859754559014113 | |
| }, | |
| { | |
| "seed": 17, | |
| "test_n_classes": 8, | |
| "accuracy": 0.2828282828282828, | |
| "macro_f1": 0.2641998881222478, | |
| "macro_roc_auc_ovr": 0.7001133264273626 | |
| }, | |
| { | |
| "seed": 23, | |
| "test_n_classes": 8, | |
| "accuracy": 0.22474747474747475, | |
| "macro_f1": 0.20938909311730927, | |
| "macro_roc_auc_ovr": 0.6952258894131303 | |
| }, | |
| { | |
| "seed": 31, | |
| "test_n_classes": 8, | |
| "accuracy": 0.25252525252525254, | |
| "macro_f1": 0.23228517698591994, | |
| "macro_roc_auc_ovr": 0.6868917272897719 | |
| }, | |
| { | |
| "seed": 45, | |
| "test_n_classes": 8, | |
| "accuracy": 0.2601010101010101, | |
| "macro_f1": 0.23328085381091487, | |
| "macro_roc_auc_ovr": 0.6955734168438206 | |
| }, | |
| { | |
| "seed": 99, | |
| "test_n_classes": 8, | |
| "accuracy": 0.21717171717171718, | |
| "macro_f1": 0.2064102665659866, | |
| "macro_roc_auc_ovr": 0.700000049204532 | |
| }, | |
| { | |
| "seed": 123, | |
| "test_n_classes": 8, | |
| "accuracy": 0.2222222222222222, | |
| "macro_f1": 0.20983049912880922, | |
| "macro_roc_auc_ovr": 0.662519489088299 | |
| }, | |
| { | |
| "seed": 200, | |
| "test_n_classes": 8, | |
| "accuracy": 0.2828282828282828, | |
| "macro_f1": 0.2801905278759914, | |
| "macro_roc_auc_ovr": 0.6954305041778505 | |
| } | |
| ], | |
| "aggregate": { | |
| "accuracy_mean": 0.2441919191919192, | |
| "accuracy_std": 0.023337760304165702, | |
| "accuracy_min": 0.21717171717171718, | |
| "accuracy_max": 0.2828282828282828, | |
| "macro_f1_mean": 0.22999635568923332, | |
| "macro_f1_std": 0.023565611735295866, | |
| "roc_auc_mean": 0.6865295341855916, | |
| "roc_auc_std": 0.013780848086567432 | |
| }, | |
| "published_artifact_seed": 42 | |
| } |