{ "purpose": "With n=100 samples and 10 classes, single-seed metrics carry test-fold variance. Multi-seed evaluation gives a more reliable performance picture.", "seeds_evaluated": [ 42, 7, 13, 17, 23, 31, 45, 99, 123, 200 ], "per_seed": [ { "seed": 42, "test_n_classes": 10, "accuracy": 0.9177777777777778, "macro_f1": 0.7780699645112974, "macro_roc_auc_ovr": 0.979171667321058 }, { "seed": 7, "test_n_classes": 10, "accuracy": 0.8988888888888888, "macro_f1": 0.7959031264581272, "macro_roc_auc_ovr": 0.9762003477988086 }, { "seed": 13, "test_n_classes": 10, "accuracy": 0.9077777777777778, "macro_f1": 0.7844193419282306, "macro_roc_auc_ovr": 0.9756039083537456 }, { "seed": 17, "test_n_classes": 10, "accuracy": 0.9055555555555556, "macro_f1": 0.7793567708150484, "macro_roc_auc_ovr": 0.9725864270053698 }, { "seed": 23, "test_n_classes": 10, "accuracy": 0.9011111111111111, "macro_f1": 0.7669056364325609, "macro_roc_auc_ovr": 0.9731577510354572 }, { "seed": 31, "test_n_classes": 10, "accuracy": 0.9055555555555556, "macro_f1": 0.7825811291140096, "macro_roc_auc_ovr": 0.9757878099386051 }, { "seed": 45, "test_n_classes": 10, "accuracy": 0.9211111111111111, "macro_f1": 0.8065645535880511, "macro_roc_auc_ovr": 0.9754272516460774 }, { "seed": 99, "test_n_classes": 10, "accuracy": 0.8822222222222222, "macro_f1": 0.7589855352578547, "macro_roc_auc_ovr": 0.9722896806606615 }, { "seed": 123, "test_n_classes": 10, "accuracy": 0.9088888888888889, "macro_f1": 0.7938334664931561, "macro_roc_auc_ovr": 0.9790976919379577 }, { "seed": 200, "test_n_classes": 10, "accuracy": 0.8977777777777778, "macro_f1": 0.7938099428748325, "macro_roc_auc_ovr": 0.9734976569094487 } ], "aggregate": { "accuracy_mean": 0.9046666666666667, "accuracy_std": 0.010337514088544894, "accuracy_min": 0.8822222222222222, "accuracy_max": 0.9211111111111111, "macro_f1_mean": 0.7840429467473169, "macro_f1_std": 0.013493004664905476, "roc_auc_mean": 0.9752820192607189, "roc_auc_std": 0.0023415667609269276 }, "published_artifact_seed": 42 }