camille-vanhoffelen
feat: switched up model param filters
4563cbc
from dataclasses import dataclass
from src.models import BenchmarkDataset
def fields(raw_class: type) -> list:
"""Return all class-level ColumnContent instances from a frozen dataclass."""
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
@dataclass
class ColumnContent:
"""Describes a single leaderboard column."""
name: str
type: str
displayed_by_default: bool
hidden: bool = False
never_hidden: bool = False
@dataclass(frozen=True)
class AutoEvalColumn:
"""Column definitions for the PII Masking Benchmark leaderboard."""
rank = ColumnContent(name="Rank", type="number", displayed_by_default=True, never_hidden=True)
model = ColumnContent(name="Model", type="markdown", displayed_by_default=True, never_hidden=True)
model_type = ColumnContent(name="Type", type="str", displayed_by_default=True, hidden=True)
organisation = ColumnContent(name="Organisation", type="str", displayed_by_default=True)
model_architecture = ColumnContent(name="Model Type", type="str", displayed_by_default=True)
active_params = ColumnContent(name="Active Params (M)", type="number", displayed_by_default=True)
total_params = ColumnContent(name="Total Params (M)", type="number", displayed_by_default=True)
max_tokens = ColumnContent(name="Max Tokens", type="number", displayed_by_default=True)
average_f2 = ColumnContent(name="Avg F2 ⬆️", type="number", displayed_by_default=True, never_hidden=True)
average_f1 = ColumnContent(name="Avg F1", type="number", displayed_by_default=True)
average_precision = ColumnContent(name="Avg Precision", type="number", displayed_by_default=True)
average_recall = ColumnContent(name="Avg Recall", type="number", displayed_by_default=True)
average_ner_strict_f1 = ColumnContent(name="Avg NER Strict F1", type="number", displayed_by_default=True)
average_ner_exact_f1 = ColumnContent(name="Avg NER Exact F1", type="number", displayed_by_default=True)
average_ner_partial_f1 = ColumnContent(name="Avg NER Partial F1", type="number", displayed_by_default=True)
average_ner_type_f1 = ColumnContent(name="Avg NER Type F1", type="number", displayed_by_default=True)
openpii_f2 = ColumnContent(name="OpenPII F2", type="number", displayed_by_default=True)
openpii_f1 = ColumnContent(name="OpenPII F1", type="number", displayed_by_default=True)
openpii_precision = ColumnContent(name="OpenPII Precision", type="number", displayed_by_default=True)
openpii_recall = ColumnContent(name="OpenPII Recall", type="number", displayed_by_default=True)
openpii_ner_strict_f1 = ColumnContent(name="OpenPII NER Strict F1", type="number", displayed_by_default=True)
openpii_ner_exact_f1 = ColumnContent(name="OpenPII NER Exact F1", type="number", displayed_by_default=True)
openpii_ner_partial_f1 = ColumnContent(name="OpenPII NER Partial F1", type="number", displayed_by_default=True)
openpii_ner_type_f1 = ColumnContent(name="OpenPII NER Type F1", type="number", displayed_by_default=True)
gretel_f2 = ColumnContent(name="Gretel F2", type="number", displayed_by_default=True)
gretel_f1 = ColumnContent(name="Gretel F1", type="number", displayed_by_default=True)
gretel_precision = ColumnContent(name="Gretel Precision", type="number", displayed_by_default=True)
gretel_recall = ColumnContent(name="Gretel Recall", type="number", displayed_by_default=True)
gretel_ner_strict_f1 = ColumnContent(name="Gretel NER Strict F1", type="number", displayed_by_default=True)
gretel_ner_exact_f1 = ColumnContent(name="Gretel NER Exact F1", type="number", displayed_by_default=True)
gretel_ner_partial_f1 = ColumnContent(name="Gretel NER Partial F1", type="number", displayed_by_default=True)
gretel_ner_type_f1 = ColumnContent(name="Gretel NER Type F1", type="number", displayed_by_default=True)
nemotron_pii_f2 = ColumnContent(name="Nemotron-PII F2", type="number", displayed_by_default=True)
nemotron_pii_f1 = ColumnContent(name="Nemotron-PII F1", type="number", displayed_by_default=True)
nemotron_pii_precision = ColumnContent(name="Nemotron-PII Precision", type="number", displayed_by_default=True)
nemotron_pii_recall = ColumnContent(name="Nemotron-PII Recall", type="number", displayed_by_default=True)
nemotron_pii_ner_strict_f1 = ColumnContent(name="Nemotron-PII NER Strict F1", type="number", displayed_by_default=True)
nemotron_pii_ner_exact_f1 = ColumnContent(name="Nemotron-PII NER Exact F1", type="number", displayed_by_default=True)
nemotron_pii_ner_partial_f1 = ColumnContent(name="Nemotron-PII NER Partial F1", type="number", displayed_by_default=True)
nemotron_pii_ner_type_f1 = ColumnContent(name="Nemotron-PII NER Type F1", type="number", displayed_by_default=True)
privy_f2 = ColumnContent(name="Privy F2", type="number", displayed_by_default=True)
privy_f1 = ColumnContent(name="Privy F1", type="number", displayed_by_default=True)
privy_precision = ColumnContent(name="Privy Precision", type="number", displayed_by_default=True)
privy_recall = ColumnContent(name="Privy Recall", type="number", displayed_by_default=True)
privy_ner_strict_f1 = ColumnContent(name="Privy NER Strict F1", type="number", displayed_by_default=True)
privy_ner_exact_f1 = ColumnContent(name="Privy NER Exact F1", type="number", displayed_by_default=True)
privy_ner_partial_f1 = ColumnContent(name="Privy NER Partial F1", type="number", displayed_by_default=True)
privy_ner_type_f1 = ColumnContent(name="Privy NER Type F1", type="number", displayed_by_default=True)
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
ALL_COLS = [c.name for c in fields(AutoEvalColumn)]
HIDDEN_COLS = [c.name for c in fields(AutoEvalColumn) if c.hidden]
MODEL_TYPE_DISPLAY: dict[str | None, str] = {
None: "Token Classification",
"token_classification": "Token Classification",
"gliner": "GLiNER",
}
MODEL_TYPE_CHOICES: list[str] = sorted(set(MODEL_TYPE_DISPLAY.values()))
PARAM_SIZE_CHOICES: list[tuple[str, float]] = [
("All", 0),
("< 100M", 100),
("< 200M", 200),
("< 500M", 500),
("< 1B", 1000),
("\u2265 1B", -1),
]
TASK_TO_COLUMNS: dict[BenchmarkDataset, dict[str, str]] = {
BenchmarkDataset.OPENPII: {
"f2": AutoEvalColumn.openpii_f2.name,
"f1": AutoEvalColumn.openpii_f1.name,
"precision": AutoEvalColumn.openpii_precision.name,
"recall": AutoEvalColumn.openpii_recall.name,
},
BenchmarkDataset.GRETEL: {
"f2": AutoEvalColumn.gretel_f2.name,
"f1": AutoEvalColumn.gretel_f1.name,
"precision": AutoEvalColumn.gretel_precision.name,
"recall": AutoEvalColumn.gretel_recall.name,
},
BenchmarkDataset.NEMOTRON_PII: {
"f2": AutoEvalColumn.nemotron_pii_f2.name,
"f1": AutoEvalColumn.nemotron_pii_f1.name,
"precision": AutoEvalColumn.nemotron_pii_precision.name,
"recall": AutoEvalColumn.nemotron_pii_recall.name,
},
BenchmarkDataset.PRIVY: {
"f2": AutoEvalColumn.privy_f2.name,
"f1": AutoEvalColumn.privy_f1.name,
"precision": AutoEvalColumn.privy_precision.name,
"recall": AutoEvalColumn.privy_recall.name,
},
}
TASK_TO_NER_COLUMNS: dict[BenchmarkDataset, dict[str, str]] = {
BenchmarkDataset.OPENPII: {
"strict_f1": AutoEvalColumn.openpii_ner_strict_f1.name,
"exact_f1": AutoEvalColumn.openpii_ner_exact_f1.name,
"partial_f1": AutoEvalColumn.openpii_ner_partial_f1.name,
"type_f1": AutoEvalColumn.openpii_ner_type_f1.name,
},
BenchmarkDataset.GRETEL: {
"strict_f1": AutoEvalColumn.gretel_ner_strict_f1.name,
"exact_f1": AutoEvalColumn.gretel_ner_exact_f1.name,
"partial_f1": AutoEvalColumn.gretel_ner_partial_f1.name,
"type_f1": AutoEvalColumn.gretel_ner_type_f1.name,
},
BenchmarkDataset.NEMOTRON_PII: {
"strict_f1": AutoEvalColumn.nemotron_pii_ner_strict_f1.name,
"exact_f1": AutoEvalColumn.nemotron_pii_ner_exact_f1.name,
"partial_f1": AutoEvalColumn.nemotron_pii_ner_partial_f1.name,
"type_f1": AutoEvalColumn.nemotron_pii_ner_type_f1.name,
},
BenchmarkDataset.PRIVY: {
"strict_f1": AutoEvalColumn.privy_ner_strict_f1.name,
"exact_f1": AutoEvalColumn.privy_ner_exact_f1.name,
"partial_f1": AutoEvalColumn.privy_ner_partial_f1.name,
"type_f1": AutoEvalColumn.privy_ner_type_f1.name,
},
}
BENCHMARK_COLS = [
"Avg F2 ⬆️",
"Avg F1",
"Avg Precision",
"Avg Recall",
]
MODEL_DETAIL_COLS = [
AutoEvalColumn.organisation.name,
AutoEvalColumn.model_architecture.name,
AutoEvalColumn.active_params.name,
AutoEvalColumn.total_params.name,
AutoEvalColumn.max_tokens.name,
]
MODEL_META_COLS = [
AutoEvalColumn.rank.name,
AutoEvalColumn.model.name,
*MODEL_DETAIL_COLS,
]
AVERAGE_SCORE_COLS = [
AutoEvalColumn.average_f2.name,
AutoEvalColumn.average_f1.name,
AutoEvalColumn.average_precision.name,
AutoEvalColumn.average_recall.name,
AutoEvalColumn.average_ner_strict_f1.name,
AutoEvalColumn.average_ner_exact_f1.name,
AutoEvalColumn.average_ner_partial_f1.name,
AutoEvalColumn.average_ner_type_f1.name,
]
TASK_SCORE_COLS = [
AutoEvalColumn.openpii_f2.name,
AutoEvalColumn.openpii_f1.name,
AutoEvalColumn.openpii_precision.name,
AutoEvalColumn.openpii_recall.name,
AutoEvalColumn.openpii_ner_strict_f1.name,
AutoEvalColumn.openpii_ner_exact_f1.name,
AutoEvalColumn.openpii_ner_partial_f1.name,
AutoEvalColumn.openpii_ner_type_f1.name,
AutoEvalColumn.gretel_f2.name,
AutoEvalColumn.gretel_f1.name,
AutoEvalColumn.gretel_precision.name,
AutoEvalColumn.gretel_recall.name,
AutoEvalColumn.gretel_ner_strict_f1.name,
AutoEvalColumn.gretel_ner_exact_f1.name,
AutoEvalColumn.gretel_ner_partial_f1.name,
AutoEvalColumn.gretel_ner_type_f1.name,
AutoEvalColumn.nemotron_pii_f2.name,
AutoEvalColumn.nemotron_pii_f1.name,
AutoEvalColumn.nemotron_pii_precision.name,
AutoEvalColumn.nemotron_pii_recall.name,
AutoEvalColumn.nemotron_pii_ner_strict_f1.name,
AutoEvalColumn.nemotron_pii_ner_exact_f1.name,
AutoEvalColumn.nemotron_pii_ner_partial_f1.name,
AutoEvalColumn.nemotron_pii_ner_type_f1.name,
AutoEvalColumn.privy_f2.name,
AutoEvalColumn.privy_f1.name,
AutoEvalColumn.privy_precision.name,
AutoEvalColumn.privy_recall.name,
AutoEvalColumn.privy_ner_strict_f1.name,
AutoEvalColumn.privy_ner_exact_f1.name,
AutoEvalColumn.privy_ner_partial_f1.name,
AutoEvalColumn.privy_ner_type_f1.name,
]
DATASET_METRIC_DETAIL_GROUPS: dict[str, list[str]] = {
"OpenPII": [
AutoEvalColumn.openpii_f1.name,
AutoEvalColumn.openpii_precision.name,
AutoEvalColumn.openpii_recall.name,
AutoEvalColumn.openpii_ner_strict_f1.name,
AutoEvalColumn.openpii_ner_exact_f1.name,
AutoEvalColumn.openpii_ner_partial_f1.name,
AutoEvalColumn.openpii_ner_type_f1.name,
],
"Gretel": [
AutoEvalColumn.gretel_f1.name,
AutoEvalColumn.gretel_precision.name,
AutoEvalColumn.gretel_recall.name,
AutoEvalColumn.gretel_ner_strict_f1.name,
AutoEvalColumn.gretel_ner_exact_f1.name,
AutoEvalColumn.gretel_ner_partial_f1.name,
AutoEvalColumn.gretel_ner_type_f1.name,
],
"Nemotron-PII": [
AutoEvalColumn.nemotron_pii_f1.name,
AutoEvalColumn.nemotron_pii_precision.name,
AutoEvalColumn.nemotron_pii_recall.name,
AutoEvalColumn.nemotron_pii_ner_strict_f1.name,
AutoEvalColumn.nemotron_pii_ner_exact_f1.name,
AutoEvalColumn.nemotron_pii_ner_partial_f1.name,
AutoEvalColumn.nemotron_pii_ner_type_f1.name,
],
"Privy": [
AutoEvalColumn.privy_f1.name,
AutoEvalColumn.privy_precision.name,
AutoEvalColumn.privy_recall.name,
AutoEvalColumn.privy_ner_strict_f1.name,
AutoEvalColumn.privy_ner_exact_f1.name,
AutoEvalColumn.privy_ner_partial_f1.name,
AutoEvalColumn.privy_ner_type_f1.name,
],
}
DATASET_METRIC_CHOICES: list[str] = list(DATASET_METRIC_DETAIL_GROUPS.keys())
NER_METRIC_COLS = [
AutoEvalColumn.average_ner_strict_f1.name,
AutoEvalColumn.average_ner_exact_f1.name,
AutoEvalColumn.average_ner_partial_f1.name,
AutoEvalColumn.average_ner_type_f1.name,
]