| from dataclasses import dataclass |
|
|
| from src.models import BenchmarkDataset |
|
|
|
|
| def fields(raw_class: type) -> list: |
| """Return all class-level ColumnContent instances from a frozen dataclass.""" |
| return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] |
|
|
|
|
| @dataclass |
| class ColumnContent: |
| """Describes a single leaderboard column.""" |
|
|
| name: str |
| type: str |
| displayed_by_default: bool |
| hidden: bool = False |
| never_hidden: bool = False |
|
|
|
|
| @dataclass(frozen=True) |
| class AutoEvalColumn: |
| """Column definitions for the PII Masking Benchmark leaderboard.""" |
|
|
| rank = ColumnContent(name="Rank", type="number", displayed_by_default=True, never_hidden=True) |
| model = ColumnContent(name="Model", type="markdown", displayed_by_default=True, never_hidden=True) |
| model_type = ColumnContent(name="Type", type="str", displayed_by_default=True, hidden=True) |
| organisation = ColumnContent(name="Organisation", type="str", displayed_by_default=True) |
| model_architecture = ColumnContent(name="Model Type", type="str", displayed_by_default=True) |
| active_params = ColumnContent(name="Active Params (M)", type="number", displayed_by_default=True) |
| total_params = ColumnContent(name="Total Params (M)", type="number", displayed_by_default=True) |
| max_tokens = ColumnContent(name="Max Tokens", type="number", displayed_by_default=True) |
| average_f2 = ColumnContent(name="Avg F2 ⬆️", type="number", displayed_by_default=True, never_hidden=True) |
| average_f1 = ColumnContent(name="Avg F1", type="number", displayed_by_default=True) |
| average_precision = ColumnContent(name="Avg Precision", type="number", displayed_by_default=True) |
| average_recall = ColumnContent(name="Avg Recall", type="number", displayed_by_default=True) |
| average_ner_strict_f1 = ColumnContent(name="Avg NER Strict F1", type="number", displayed_by_default=True) |
| average_ner_exact_f1 = ColumnContent(name="Avg NER Exact F1", type="number", displayed_by_default=True) |
| average_ner_partial_f1 = ColumnContent(name="Avg NER Partial F1", type="number", displayed_by_default=True) |
| average_ner_type_f1 = ColumnContent(name="Avg NER Type F1", type="number", displayed_by_default=True) |
| openpii_f2 = ColumnContent(name="OpenPII F2", type="number", displayed_by_default=True) |
| openpii_f1 = ColumnContent(name="OpenPII F1", type="number", displayed_by_default=True) |
| openpii_precision = ColumnContent(name="OpenPII Precision", type="number", displayed_by_default=True) |
| openpii_recall = ColumnContent(name="OpenPII Recall", type="number", displayed_by_default=True) |
| openpii_ner_strict_f1 = ColumnContent(name="OpenPII NER Strict F1", type="number", displayed_by_default=True) |
| openpii_ner_exact_f1 = ColumnContent(name="OpenPII NER Exact F1", type="number", displayed_by_default=True) |
| openpii_ner_partial_f1 = ColumnContent(name="OpenPII NER Partial F1", type="number", displayed_by_default=True) |
| openpii_ner_type_f1 = ColumnContent(name="OpenPII NER Type F1", type="number", displayed_by_default=True) |
| gretel_f2 = ColumnContent(name="Gretel F2", type="number", displayed_by_default=True) |
| gretel_f1 = ColumnContent(name="Gretel F1", type="number", displayed_by_default=True) |
| gretel_precision = ColumnContent(name="Gretel Precision", type="number", displayed_by_default=True) |
| gretel_recall = ColumnContent(name="Gretel Recall", type="number", displayed_by_default=True) |
| gretel_ner_strict_f1 = ColumnContent(name="Gretel NER Strict F1", type="number", displayed_by_default=True) |
| gretel_ner_exact_f1 = ColumnContent(name="Gretel NER Exact F1", type="number", displayed_by_default=True) |
| gretel_ner_partial_f1 = ColumnContent(name="Gretel NER Partial F1", type="number", displayed_by_default=True) |
| gretel_ner_type_f1 = ColumnContent(name="Gretel NER Type F1", type="number", displayed_by_default=True) |
| nemotron_pii_f2 = ColumnContent(name="Nemotron-PII F2", type="number", displayed_by_default=True) |
| nemotron_pii_f1 = ColumnContent(name="Nemotron-PII F1", type="number", displayed_by_default=True) |
| nemotron_pii_precision = ColumnContent(name="Nemotron-PII Precision", type="number", displayed_by_default=True) |
| nemotron_pii_recall = ColumnContent(name="Nemotron-PII Recall", type="number", displayed_by_default=True) |
| nemotron_pii_ner_strict_f1 = ColumnContent(name="Nemotron-PII NER Strict F1", type="number", displayed_by_default=True) |
| nemotron_pii_ner_exact_f1 = ColumnContent(name="Nemotron-PII NER Exact F1", type="number", displayed_by_default=True) |
| nemotron_pii_ner_partial_f1 = ColumnContent(name="Nemotron-PII NER Partial F1", type="number", displayed_by_default=True) |
| nemotron_pii_ner_type_f1 = ColumnContent(name="Nemotron-PII NER Type F1", type="number", displayed_by_default=True) |
| privy_f2 = ColumnContent(name="Privy F2", type="number", displayed_by_default=True) |
| privy_f1 = ColumnContent(name="Privy F1", type="number", displayed_by_default=True) |
| privy_precision = ColumnContent(name="Privy Precision", type="number", displayed_by_default=True) |
| privy_recall = ColumnContent(name="Privy Recall", type="number", displayed_by_default=True) |
| privy_ner_strict_f1 = ColumnContent(name="Privy NER Strict F1", type="number", displayed_by_default=True) |
| privy_ner_exact_f1 = ColumnContent(name="Privy NER Exact F1", type="number", displayed_by_default=True) |
| privy_ner_partial_f1 = ColumnContent(name="Privy NER Partial F1", type="number", displayed_by_default=True) |
| privy_ner_type_f1 = ColumnContent(name="Privy NER Type F1", type="number", displayed_by_default=True) |
|
|
|
|
| COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] |
| ALL_COLS = [c.name for c in fields(AutoEvalColumn)] |
| HIDDEN_COLS = [c.name for c in fields(AutoEvalColumn) if c.hidden] |
|
|
| MODEL_TYPE_DISPLAY: dict[str | None, str] = { |
| None: "Token Classification", |
| "token_classification": "Token Classification", |
| "gliner": "GLiNER", |
| } |
| MODEL_TYPE_CHOICES: list[str] = sorted(set(MODEL_TYPE_DISPLAY.values())) |
|
|
| PARAM_SIZE_CHOICES: list[tuple[str, float]] = [ |
| ("All", 0), |
| ("< 100M", 100), |
| ("< 200M", 200), |
| ("< 500M", 500), |
| ("< 1B", 1000), |
| ("\u2265 1B", -1), |
| ] |
|
|
| TASK_TO_COLUMNS: dict[BenchmarkDataset, dict[str, str]] = { |
| BenchmarkDataset.OPENPII: { |
| "f2": AutoEvalColumn.openpii_f2.name, |
| "f1": AutoEvalColumn.openpii_f1.name, |
| "precision": AutoEvalColumn.openpii_precision.name, |
| "recall": AutoEvalColumn.openpii_recall.name, |
| }, |
| BenchmarkDataset.GRETEL: { |
| "f2": AutoEvalColumn.gretel_f2.name, |
| "f1": AutoEvalColumn.gretel_f1.name, |
| "precision": AutoEvalColumn.gretel_precision.name, |
| "recall": AutoEvalColumn.gretel_recall.name, |
| }, |
| BenchmarkDataset.NEMOTRON_PII: { |
| "f2": AutoEvalColumn.nemotron_pii_f2.name, |
| "f1": AutoEvalColumn.nemotron_pii_f1.name, |
| "precision": AutoEvalColumn.nemotron_pii_precision.name, |
| "recall": AutoEvalColumn.nemotron_pii_recall.name, |
| }, |
| BenchmarkDataset.PRIVY: { |
| "f2": AutoEvalColumn.privy_f2.name, |
| "f1": AutoEvalColumn.privy_f1.name, |
| "precision": AutoEvalColumn.privy_precision.name, |
| "recall": AutoEvalColumn.privy_recall.name, |
| }, |
| } |
|
|
| TASK_TO_NER_COLUMNS: dict[BenchmarkDataset, dict[str, str]] = { |
| BenchmarkDataset.OPENPII: { |
| "strict_f1": AutoEvalColumn.openpii_ner_strict_f1.name, |
| "exact_f1": AutoEvalColumn.openpii_ner_exact_f1.name, |
| "partial_f1": AutoEvalColumn.openpii_ner_partial_f1.name, |
| "type_f1": AutoEvalColumn.openpii_ner_type_f1.name, |
| }, |
| BenchmarkDataset.GRETEL: { |
| "strict_f1": AutoEvalColumn.gretel_ner_strict_f1.name, |
| "exact_f1": AutoEvalColumn.gretel_ner_exact_f1.name, |
| "partial_f1": AutoEvalColumn.gretel_ner_partial_f1.name, |
| "type_f1": AutoEvalColumn.gretel_ner_type_f1.name, |
| }, |
| BenchmarkDataset.NEMOTRON_PII: { |
| "strict_f1": AutoEvalColumn.nemotron_pii_ner_strict_f1.name, |
| "exact_f1": AutoEvalColumn.nemotron_pii_ner_exact_f1.name, |
| "partial_f1": AutoEvalColumn.nemotron_pii_ner_partial_f1.name, |
| "type_f1": AutoEvalColumn.nemotron_pii_ner_type_f1.name, |
| }, |
| BenchmarkDataset.PRIVY: { |
| "strict_f1": AutoEvalColumn.privy_ner_strict_f1.name, |
| "exact_f1": AutoEvalColumn.privy_ner_exact_f1.name, |
| "partial_f1": AutoEvalColumn.privy_ner_partial_f1.name, |
| "type_f1": AutoEvalColumn.privy_ner_type_f1.name, |
| }, |
| } |
|
|
| BENCHMARK_COLS = [ |
| "Avg F2 ⬆️", |
| "Avg F1", |
| "Avg Precision", |
| "Avg Recall", |
| ] |
|
|
| MODEL_DETAIL_COLS = [ |
| AutoEvalColumn.organisation.name, |
| AutoEvalColumn.model_architecture.name, |
| AutoEvalColumn.active_params.name, |
| AutoEvalColumn.total_params.name, |
| AutoEvalColumn.max_tokens.name, |
| ] |
|
|
| MODEL_META_COLS = [ |
| AutoEvalColumn.rank.name, |
| AutoEvalColumn.model.name, |
| *MODEL_DETAIL_COLS, |
| ] |
|
|
| AVERAGE_SCORE_COLS = [ |
| AutoEvalColumn.average_f2.name, |
| AutoEvalColumn.average_f1.name, |
| AutoEvalColumn.average_precision.name, |
| AutoEvalColumn.average_recall.name, |
| AutoEvalColumn.average_ner_strict_f1.name, |
| AutoEvalColumn.average_ner_exact_f1.name, |
| AutoEvalColumn.average_ner_partial_f1.name, |
| AutoEvalColumn.average_ner_type_f1.name, |
| ] |
|
|
| TASK_SCORE_COLS = [ |
| AutoEvalColumn.openpii_f2.name, |
| AutoEvalColumn.openpii_f1.name, |
| AutoEvalColumn.openpii_precision.name, |
| AutoEvalColumn.openpii_recall.name, |
| AutoEvalColumn.openpii_ner_strict_f1.name, |
| AutoEvalColumn.openpii_ner_exact_f1.name, |
| AutoEvalColumn.openpii_ner_partial_f1.name, |
| AutoEvalColumn.openpii_ner_type_f1.name, |
| AutoEvalColumn.gretel_f2.name, |
| AutoEvalColumn.gretel_f1.name, |
| AutoEvalColumn.gretel_precision.name, |
| AutoEvalColumn.gretel_recall.name, |
| AutoEvalColumn.gretel_ner_strict_f1.name, |
| AutoEvalColumn.gretel_ner_exact_f1.name, |
| AutoEvalColumn.gretel_ner_partial_f1.name, |
| AutoEvalColumn.gretel_ner_type_f1.name, |
| AutoEvalColumn.nemotron_pii_f2.name, |
| AutoEvalColumn.nemotron_pii_f1.name, |
| AutoEvalColumn.nemotron_pii_precision.name, |
| AutoEvalColumn.nemotron_pii_recall.name, |
| AutoEvalColumn.nemotron_pii_ner_strict_f1.name, |
| AutoEvalColumn.nemotron_pii_ner_exact_f1.name, |
| AutoEvalColumn.nemotron_pii_ner_partial_f1.name, |
| AutoEvalColumn.nemotron_pii_ner_type_f1.name, |
| AutoEvalColumn.privy_f2.name, |
| AutoEvalColumn.privy_f1.name, |
| AutoEvalColumn.privy_precision.name, |
| AutoEvalColumn.privy_recall.name, |
| AutoEvalColumn.privy_ner_strict_f1.name, |
| AutoEvalColumn.privy_ner_exact_f1.name, |
| AutoEvalColumn.privy_ner_partial_f1.name, |
| AutoEvalColumn.privy_ner_type_f1.name, |
| ] |
|
|
| DATASET_METRIC_DETAIL_GROUPS: dict[str, list[str]] = { |
| "OpenPII": [ |
| AutoEvalColumn.openpii_f1.name, |
| AutoEvalColumn.openpii_precision.name, |
| AutoEvalColumn.openpii_recall.name, |
| AutoEvalColumn.openpii_ner_strict_f1.name, |
| AutoEvalColumn.openpii_ner_exact_f1.name, |
| AutoEvalColumn.openpii_ner_partial_f1.name, |
| AutoEvalColumn.openpii_ner_type_f1.name, |
| ], |
| "Gretel": [ |
| AutoEvalColumn.gretel_f1.name, |
| AutoEvalColumn.gretel_precision.name, |
| AutoEvalColumn.gretel_recall.name, |
| AutoEvalColumn.gretel_ner_strict_f1.name, |
| AutoEvalColumn.gretel_ner_exact_f1.name, |
| AutoEvalColumn.gretel_ner_partial_f1.name, |
| AutoEvalColumn.gretel_ner_type_f1.name, |
| ], |
| "Nemotron-PII": [ |
| AutoEvalColumn.nemotron_pii_f1.name, |
| AutoEvalColumn.nemotron_pii_precision.name, |
| AutoEvalColumn.nemotron_pii_recall.name, |
| AutoEvalColumn.nemotron_pii_ner_strict_f1.name, |
| AutoEvalColumn.nemotron_pii_ner_exact_f1.name, |
| AutoEvalColumn.nemotron_pii_ner_partial_f1.name, |
| AutoEvalColumn.nemotron_pii_ner_type_f1.name, |
| ], |
| "Privy": [ |
| AutoEvalColumn.privy_f1.name, |
| AutoEvalColumn.privy_precision.name, |
| AutoEvalColumn.privy_recall.name, |
| AutoEvalColumn.privy_ner_strict_f1.name, |
| AutoEvalColumn.privy_ner_exact_f1.name, |
| AutoEvalColumn.privy_ner_partial_f1.name, |
| AutoEvalColumn.privy_ner_type_f1.name, |
| ], |
| } |
|
|
| DATASET_METRIC_CHOICES: list[str] = list(DATASET_METRIC_DETAIL_GROUPS.keys()) |
|
|
| NER_METRIC_COLS = [ |
| AutoEvalColumn.average_ner_strict_f1.name, |
| AutoEvalColumn.average_ner_exact_f1.name, |
| AutoEvalColumn.average_ner_partial_f1.name, |
| AutoEvalColumn.average_ner_type_f1.name, |
| ] |
|
|