from dataclasses import dataclass from src.models import BenchmarkDataset def fields(raw_class: type) -> list: """Return all class-level ColumnContent instances from a frozen dataclass.""" return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] @dataclass class ColumnContent: """Describes a single leaderboard column.""" name: str type: str displayed_by_default: bool hidden: bool = False never_hidden: bool = False @dataclass(frozen=True) class AutoEvalColumn: """Column definitions for the PII Masking Benchmark leaderboard.""" rank = ColumnContent(name="Rank", type="number", displayed_by_default=True, never_hidden=True) model = ColumnContent(name="Model", type="markdown", displayed_by_default=True, never_hidden=True) model_type = ColumnContent(name="Type", type="str", displayed_by_default=True, hidden=True) organisation = ColumnContent(name="Organisation", type="str", displayed_by_default=True) model_architecture = ColumnContent(name="Model Type", type="str", displayed_by_default=True) active_params = ColumnContent(name="Active Params (M)", type="number", displayed_by_default=True) total_params = ColumnContent(name="Total Params (M)", type="number", displayed_by_default=True) max_tokens = ColumnContent(name="Max Tokens", type="number", displayed_by_default=True) average_f2 = ColumnContent(name="Avg F2 ⬆️", type="number", displayed_by_default=True, never_hidden=True) average_f1 = ColumnContent(name="Avg F1", type="number", displayed_by_default=True) average_precision = ColumnContent(name="Avg Precision", type="number", displayed_by_default=True) average_recall = ColumnContent(name="Avg Recall", type="number", displayed_by_default=True) average_ner_strict_f1 = ColumnContent(name="Avg NER Strict F1", type="number", displayed_by_default=True) average_ner_exact_f1 = ColumnContent(name="Avg NER Exact F1", type="number", displayed_by_default=True) average_ner_partial_f1 = ColumnContent(name="Avg NER Partial F1", type="number", displayed_by_default=True) average_ner_type_f1 = ColumnContent(name="Avg NER Type F1", type="number", displayed_by_default=True) openpii_f2 = ColumnContent(name="OpenPII F2", type="number", displayed_by_default=True) openpii_f1 = ColumnContent(name="OpenPII F1", type="number", displayed_by_default=True) openpii_precision = ColumnContent(name="OpenPII Precision", type="number", displayed_by_default=True) openpii_recall = ColumnContent(name="OpenPII Recall", type="number", displayed_by_default=True) openpii_ner_strict_f1 = ColumnContent(name="OpenPII NER Strict F1", type="number", displayed_by_default=True) openpii_ner_exact_f1 = ColumnContent(name="OpenPII NER Exact F1", type="number", displayed_by_default=True) openpii_ner_partial_f1 = ColumnContent(name="OpenPII NER Partial F1", type="number", displayed_by_default=True) openpii_ner_type_f1 = ColumnContent(name="OpenPII NER Type F1", type="number", displayed_by_default=True) gretel_f2 = ColumnContent(name="Gretel F2", type="number", displayed_by_default=True) gretel_f1 = ColumnContent(name="Gretel F1", type="number", displayed_by_default=True) gretel_precision = ColumnContent(name="Gretel Precision", type="number", displayed_by_default=True) gretel_recall = ColumnContent(name="Gretel Recall", type="number", displayed_by_default=True) gretel_ner_strict_f1 = ColumnContent(name="Gretel NER Strict F1", type="number", displayed_by_default=True) gretel_ner_exact_f1 = ColumnContent(name="Gretel NER Exact F1", type="number", displayed_by_default=True) gretel_ner_partial_f1 = ColumnContent(name="Gretel NER Partial F1", type="number", displayed_by_default=True) gretel_ner_type_f1 = ColumnContent(name="Gretel NER Type F1", type="number", displayed_by_default=True) nemotron_pii_f2 = ColumnContent(name="Nemotron-PII F2", type="number", displayed_by_default=True) nemotron_pii_f1 = ColumnContent(name="Nemotron-PII F1", type="number", displayed_by_default=True) nemotron_pii_precision = ColumnContent(name="Nemotron-PII Precision", type="number", displayed_by_default=True) nemotron_pii_recall = ColumnContent(name="Nemotron-PII Recall", type="number", displayed_by_default=True) nemotron_pii_ner_strict_f1 = ColumnContent(name="Nemotron-PII NER Strict F1", type="number", displayed_by_default=True) nemotron_pii_ner_exact_f1 = ColumnContent(name="Nemotron-PII NER Exact F1", type="number", displayed_by_default=True) nemotron_pii_ner_partial_f1 = ColumnContent(name="Nemotron-PII NER Partial F1", type="number", displayed_by_default=True) nemotron_pii_ner_type_f1 = ColumnContent(name="Nemotron-PII NER Type F1", type="number", displayed_by_default=True) privy_f2 = ColumnContent(name="Privy F2", type="number", displayed_by_default=True) privy_f1 = ColumnContent(name="Privy F1", type="number", displayed_by_default=True) privy_precision = ColumnContent(name="Privy Precision", type="number", displayed_by_default=True) privy_recall = ColumnContent(name="Privy Recall", type="number", displayed_by_default=True) privy_ner_strict_f1 = ColumnContent(name="Privy NER Strict F1", type="number", displayed_by_default=True) privy_ner_exact_f1 = ColumnContent(name="Privy NER Exact F1", type="number", displayed_by_default=True) privy_ner_partial_f1 = ColumnContent(name="Privy NER Partial F1", type="number", displayed_by_default=True) privy_ner_type_f1 = ColumnContent(name="Privy NER Type F1", type="number", displayed_by_default=True) COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] ALL_COLS = [c.name for c in fields(AutoEvalColumn)] HIDDEN_COLS = [c.name for c in fields(AutoEvalColumn) if c.hidden] MODEL_TYPE_DISPLAY: dict[str | None, str] = { None: "Token Classification", "token_classification": "Token Classification", "gliner": "GLiNER", } MODEL_TYPE_CHOICES: list[str] = sorted(set(MODEL_TYPE_DISPLAY.values())) PARAM_SIZE_CHOICES: list[tuple[str, float]] = [ ("All", 0), ("< 100M", 100), ("< 200M", 200), ("< 500M", 500), ("< 1B", 1000), ("\u2265 1B", -1), ] TASK_TO_COLUMNS: dict[BenchmarkDataset, dict[str, str]] = { BenchmarkDataset.OPENPII: { "f2": AutoEvalColumn.openpii_f2.name, "f1": AutoEvalColumn.openpii_f1.name, "precision": AutoEvalColumn.openpii_precision.name, "recall": AutoEvalColumn.openpii_recall.name, }, BenchmarkDataset.GRETEL: { "f2": AutoEvalColumn.gretel_f2.name, "f1": AutoEvalColumn.gretel_f1.name, "precision": AutoEvalColumn.gretel_precision.name, "recall": AutoEvalColumn.gretel_recall.name, }, BenchmarkDataset.NEMOTRON_PII: { "f2": AutoEvalColumn.nemotron_pii_f2.name, "f1": AutoEvalColumn.nemotron_pii_f1.name, "precision": AutoEvalColumn.nemotron_pii_precision.name, "recall": AutoEvalColumn.nemotron_pii_recall.name, }, BenchmarkDataset.PRIVY: { "f2": AutoEvalColumn.privy_f2.name, "f1": AutoEvalColumn.privy_f1.name, "precision": AutoEvalColumn.privy_precision.name, "recall": AutoEvalColumn.privy_recall.name, }, } TASK_TO_NER_COLUMNS: dict[BenchmarkDataset, dict[str, str]] = { BenchmarkDataset.OPENPII: { "strict_f1": AutoEvalColumn.openpii_ner_strict_f1.name, "exact_f1": AutoEvalColumn.openpii_ner_exact_f1.name, "partial_f1": AutoEvalColumn.openpii_ner_partial_f1.name, "type_f1": AutoEvalColumn.openpii_ner_type_f1.name, }, BenchmarkDataset.GRETEL: { "strict_f1": AutoEvalColumn.gretel_ner_strict_f1.name, "exact_f1": AutoEvalColumn.gretel_ner_exact_f1.name, "partial_f1": AutoEvalColumn.gretel_ner_partial_f1.name, "type_f1": AutoEvalColumn.gretel_ner_type_f1.name, }, BenchmarkDataset.NEMOTRON_PII: { "strict_f1": AutoEvalColumn.nemotron_pii_ner_strict_f1.name, "exact_f1": AutoEvalColumn.nemotron_pii_ner_exact_f1.name, "partial_f1": AutoEvalColumn.nemotron_pii_ner_partial_f1.name, "type_f1": AutoEvalColumn.nemotron_pii_ner_type_f1.name, }, BenchmarkDataset.PRIVY: { "strict_f1": AutoEvalColumn.privy_ner_strict_f1.name, "exact_f1": AutoEvalColumn.privy_ner_exact_f1.name, "partial_f1": AutoEvalColumn.privy_ner_partial_f1.name, "type_f1": AutoEvalColumn.privy_ner_type_f1.name, }, } BENCHMARK_COLS = [ "Avg F2 ⬆️", "Avg F1", "Avg Precision", "Avg Recall", ] MODEL_DETAIL_COLS = [ AutoEvalColumn.organisation.name, AutoEvalColumn.model_architecture.name, AutoEvalColumn.active_params.name, AutoEvalColumn.total_params.name, AutoEvalColumn.max_tokens.name, ] MODEL_META_COLS = [ AutoEvalColumn.rank.name, AutoEvalColumn.model.name, *MODEL_DETAIL_COLS, ] AVERAGE_SCORE_COLS = [ AutoEvalColumn.average_f2.name, AutoEvalColumn.average_f1.name, AutoEvalColumn.average_precision.name, AutoEvalColumn.average_recall.name, AutoEvalColumn.average_ner_strict_f1.name, AutoEvalColumn.average_ner_exact_f1.name, AutoEvalColumn.average_ner_partial_f1.name, AutoEvalColumn.average_ner_type_f1.name, ] TASK_SCORE_COLS = [ AutoEvalColumn.openpii_f2.name, AutoEvalColumn.openpii_f1.name, AutoEvalColumn.openpii_precision.name, AutoEvalColumn.openpii_recall.name, AutoEvalColumn.openpii_ner_strict_f1.name, AutoEvalColumn.openpii_ner_exact_f1.name, AutoEvalColumn.openpii_ner_partial_f1.name, AutoEvalColumn.openpii_ner_type_f1.name, AutoEvalColumn.gretel_f2.name, AutoEvalColumn.gretel_f1.name, AutoEvalColumn.gretel_precision.name, AutoEvalColumn.gretel_recall.name, AutoEvalColumn.gretel_ner_strict_f1.name, AutoEvalColumn.gretel_ner_exact_f1.name, AutoEvalColumn.gretel_ner_partial_f1.name, AutoEvalColumn.gretel_ner_type_f1.name, AutoEvalColumn.nemotron_pii_f2.name, AutoEvalColumn.nemotron_pii_f1.name, AutoEvalColumn.nemotron_pii_precision.name, AutoEvalColumn.nemotron_pii_recall.name, AutoEvalColumn.nemotron_pii_ner_strict_f1.name, AutoEvalColumn.nemotron_pii_ner_exact_f1.name, AutoEvalColumn.nemotron_pii_ner_partial_f1.name, AutoEvalColumn.nemotron_pii_ner_type_f1.name, AutoEvalColumn.privy_f2.name, AutoEvalColumn.privy_f1.name, AutoEvalColumn.privy_precision.name, AutoEvalColumn.privy_recall.name, AutoEvalColumn.privy_ner_strict_f1.name, AutoEvalColumn.privy_ner_exact_f1.name, AutoEvalColumn.privy_ner_partial_f1.name, AutoEvalColumn.privy_ner_type_f1.name, ] DATASET_METRIC_DETAIL_GROUPS: dict[str, list[str]] = { "OpenPII": [ AutoEvalColumn.openpii_f1.name, AutoEvalColumn.openpii_precision.name, AutoEvalColumn.openpii_recall.name, AutoEvalColumn.openpii_ner_strict_f1.name, AutoEvalColumn.openpii_ner_exact_f1.name, AutoEvalColumn.openpii_ner_partial_f1.name, AutoEvalColumn.openpii_ner_type_f1.name, ], "Gretel": [ AutoEvalColumn.gretel_f1.name, AutoEvalColumn.gretel_precision.name, AutoEvalColumn.gretel_recall.name, AutoEvalColumn.gretel_ner_strict_f1.name, AutoEvalColumn.gretel_ner_exact_f1.name, AutoEvalColumn.gretel_ner_partial_f1.name, AutoEvalColumn.gretel_ner_type_f1.name, ], "Nemotron-PII": [ AutoEvalColumn.nemotron_pii_f1.name, AutoEvalColumn.nemotron_pii_precision.name, AutoEvalColumn.nemotron_pii_recall.name, AutoEvalColumn.nemotron_pii_ner_strict_f1.name, AutoEvalColumn.nemotron_pii_ner_exact_f1.name, AutoEvalColumn.nemotron_pii_ner_partial_f1.name, AutoEvalColumn.nemotron_pii_ner_type_f1.name, ], "Privy": [ AutoEvalColumn.privy_f1.name, AutoEvalColumn.privy_precision.name, AutoEvalColumn.privy_recall.name, AutoEvalColumn.privy_ner_strict_f1.name, AutoEvalColumn.privy_ner_exact_f1.name, AutoEvalColumn.privy_ner_partial_f1.name, AutoEvalColumn.privy_ner_type_f1.name, ], } DATASET_METRIC_CHOICES: list[str] = list(DATASET_METRIC_DETAIL_GROUPS.keys()) NER_METRIC_COLS = [ AutoEvalColumn.average_ner_strict_f1.name, AutoEvalColumn.average_ner_exact_f1.name, AutoEvalColumn.average_ner_partial_f1.name, AutoEvalColumn.average_ner_type_f1.name, ]