{ "purpose": "With n=100 campaigns, single-seed metrics carry test-fold variance. Multi-seed evaluation gives a more reliable picture.", "seeds_evaluated": [ 42, 7, 13, 17, 23, 31, 45, 99, 123, 200 ], "per_seed": [ { "seed": 42, "test_n_classes": 7, "accuracy": 0.6547008547008547, "macro_f1": 0.6401276666852063, "macro_roc_auc_ovr": 0.935584434710217 }, { "seed": 7, "test_n_classes": 7, "accuracy": 0.6267123287671232, "macro_f1": 0.6141815367358149, "macro_roc_auc_ovr": 0.9256987657069029 }, { "seed": 13, "test_n_classes": 7, "accuracy": 0.5983050847457627, "macro_f1": 0.5953435905708684, "macro_roc_auc_ovr": 0.9235372520169014 }, { "seed": 17, "test_n_classes": 7, "accuracy": 0.64349376114082, "macro_f1": 0.6328717716731788, "macro_roc_auc_ovr": 0.9426545946495839 }, { "seed": 23, "test_n_classes": 7, "accuracy": 0.5915254237288136, "macro_f1": 0.5734921834318393, "macro_roc_auc_ovr": 0.9245031023094512 }, { "seed": 31, "test_n_classes": 7, "accuracy": 0.6220095693779905, "macro_f1": 0.6103022022937624, "macro_roc_auc_ovr": 0.9325576570435162 }, { "seed": 45, "test_n_classes": 7, "accuracy": 0.6678082191780822, "macro_f1": 0.655097964659693, "macro_roc_auc_ovr": 0.9396074000285977 }, { "seed": 99, "test_n_classes": 7, "accuracy": 0.7111111111111111, "macro_f1": 0.7136854710276727, "macro_roc_auc_ovr": 0.9538147161172963 }, { "seed": 123, "test_n_classes": 7, "accuracy": 0.6823734729493892, "macro_f1": 0.6727927606720584, "macro_roc_auc_ovr": 0.9443324151480283 }, { "seed": 200, "test_n_classes": 7, "accuracy": 0.6931407942238267, "macro_f1": 0.6752712902262269, "macro_roc_auc_ovr": 0.9450377543018418 } ], "aggregate": { "accuracy_mean": 0.6491180619923773, "accuracy_std": 0.03799334369624316, "accuracy_min": 0.5915254237288136, "accuracy_max": 0.7111111111111111, "macro_f1_mean": 0.638316643797632, "macro_f1_std": 0.039956794294168915, "roc_auc_mean": 0.9367328092032338, "roc_auc_std": 0.009623085359130642 }, "published_artifact_seed": 42 }