haremb-privacy-filter-opennemo / configuration_haremb_pii.py
fblgit's picture
Upload folder using huggingface_hub
f0f5785
"""
HaremPiiConfig — subclass of OpenAIPrivacyFilterConfig that:
* sets `model_type="haremb_pii"` (so AutoConfig + auto_map dispatch works
with `trust_remote_code=True`)
* paired with HaremPiiForTokenClassification in modeling_haremb_pii.py
via `auto_map`
This release is a 1-layer surgical slice of the OpenMed teacher:
* num_hidden_layers=1
* inference-only — Viterbi decoding is built into the forward pass.
"""
from __future__ import annotations
from transformers.models.openai_privacy_filter.configuration_openai_privacy_filter import (
OpenAIPrivacyFilterConfig,
)
class HaremPiiConfig(OpenAIPrivacyFilterConfig):
"""
HarEmb config. `model_type="haremb_pii"` disambiguates from upstream so
AutoConfig + AutoModel mappings can target our subclasses without
colliding with the registered OpenAIPrivacyFilterConfig entry.
`modeling_haremb_pii` performs the auto-registration at import time.
"""
model_type = "haremb_pii"
def __init__(
self,
use_viterbi_decode: bool = True,
viterbi_replace_logits: bool = True,
**kwargs,
):
super().__init__(**kwargs)
# When True (and model is in eval mode), HaremPiiForTokenClassification.forward
# runs constrained BIOES Viterbi over logits and attaches `predicted_labels`
# to the output. Set to False to skip Viterbi entirely.
self.use_viterbi_decode = bool(use_viterbi_decode)
# When True (and Viterbi is on), forward replaces `outputs.logits` with a
# one-hot-shaped tensor whose argmax equals the Viterbi prediction. This
# makes HF `pipeline()` and any naive `logits.argmax(-1)` consumer use
# Viterbi predictions automatically. The raw logits are preserved on
# the output as `raw_logits`.
self.viterbi_replace_logits = bool(viterbi_replace_logits)
__all__ = ["HaremPiiConfig"]