| { |
| "purpose": "With n=100 samples and 10 classes, single-seed metrics carry test-fold variance. Multi-seed evaluation gives a more reliable performance picture.", |
| "seeds_evaluated": [ |
| 42, |
| 7, |
| 13, |
| 17, |
| 23, |
| 31, |
| 45, |
| 99, |
| 123, |
| 200 |
| ], |
| "per_seed": [ |
| { |
| "seed": 42, |
| "test_n_classes": 10, |
| "accuracy": 0.9177777777777778, |
| "macro_f1": 0.7780699645112974, |
| "macro_roc_auc_ovr": 0.979171667321058 |
| }, |
| { |
| "seed": 7, |
| "test_n_classes": 10, |
| "accuracy": 0.8988888888888888, |
| "macro_f1": 0.7959031264581272, |
| "macro_roc_auc_ovr": 0.9762003477988086 |
| }, |
| { |
| "seed": 13, |
| "test_n_classes": 10, |
| "accuracy": 0.9077777777777778, |
| "macro_f1": 0.7844193419282306, |
| "macro_roc_auc_ovr": 0.9756039083537456 |
| }, |
| { |
| "seed": 17, |
| "test_n_classes": 10, |
| "accuracy": 0.9055555555555556, |
| "macro_f1": 0.7793567708150484, |
| "macro_roc_auc_ovr": 0.9725864270053698 |
| }, |
| { |
| "seed": 23, |
| "test_n_classes": 10, |
| "accuracy": 0.9011111111111111, |
| "macro_f1": 0.7669056364325609, |
| "macro_roc_auc_ovr": 0.9731577510354572 |
| }, |
| { |
| "seed": 31, |
| "test_n_classes": 10, |
| "accuracy": 0.9055555555555556, |
| "macro_f1": 0.7825811291140096, |
| "macro_roc_auc_ovr": 0.9757878099386051 |
| }, |
| { |
| "seed": 45, |
| "test_n_classes": 10, |
| "accuracy": 0.9211111111111111, |
| "macro_f1": 0.8065645535880511, |
| "macro_roc_auc_ovr": 0.9754272516460774 |
| }, |
| { |
| "seed": 99, |
| "test_n_classes": 10, |
| "accuracy": 0.8822222222222222, |
| "macro_f1": 0.7589855352578547, |
| "macro_roc_auc_ovr": 0.9722896806606615 |
| }, |
| { |
| "seed": 123, |
| "test_n_classes": 10, |
| "accuracy": 0.9088888888888889, |
| "macro_f1": 0.7938334664931561, |
| "macro_roc_auc_ovr": 0.9790976919379577 |
| }, |
| { |
| "seed": 200, |
| "test_n_classes": 10, |
| "accuracy": 0.8977777777777778, |
| "macro_f1": 0.7938099428748325, |
| "macro_roc_auc_ovr": 0.9734976569094487 |
| } |
| ], |
| "aggregate": { |
| "accuracy_mean": 0.9046666666666667, |
| "accuracy_std": 0.010337514088544894, |
| "accuracy_min": 0.8822222222222222, |
| "accuracy_max": 0.9211111111111111, |
| "macro_f1_mean": 0.7840429467473169, |
| "macro_f1_std": 0.013493004664905476, |
| "roc_auc_mean": 0.9752820192607189, |
| "roc_auc_std": 0.0023415667609269276 |
| }, |
| "published_artifact_seed": 42 |
| } |