""" HaremPiiConfig — subclass of OpenAIPrivacyFilterConfig that: * sets `model_type="haremb_pii"` (so AutoConfig + auto_map dispatch works with `trust_remote_code=True`) * paired with HaremPiiForTokenClassification in modeling_haremb_pii.py via `auto_map` This release is a 1-layer surgical slice of the OpenMed teacher: * num_hidden_layers=1 * inference-only — Viterbi decoding is built into the forward pass. """ from __future__ import annotations from transformers.models.openai_privacy_filter.configuration_openai_privacy_filter import ( OpenAIPrivacyFilterConfig, ) class HaremPiiConfig(OpenAIPrivacyFilterConfig): """ HarEmb config. `model_type="haremb_pii"` disambiguates from upstream so AutoConfig + AutoModel mappings can target our subclasses without colliding with the registered OpenAIPrivacyFilterConfig entry. `modeling_haremb_pii` performs the auto-registration at import time. """ model_type = "haremb_pii" def __init__( self, use_viterbi_decode: bool = True, viterbi_replace_logits: bool = True, **kwargs, ): super().__init__(**kwargs) # When True (and model is in eval mode), HaremPiiForTokenClassification.forward # runs constrained BIOES Viterbi over logits and attaches `predicted_labels` # to the output. Set to False to skip Viterbi entirely. self.use_viterbi_decode = bool(use_viterbi_decode) # When True (and Viterbi is on), forward replaces `outputs.logits` with a # one-hot-shaped tensor whose argmax equals the Viterbi prediction. This # makes HF `pipeline()` and any naive `logits.argmax(-1)` consumer use # Viterbi predictions automatically. The raw logits are preserved on # the output as `raw_logits`. self.viterbi_replace_logits = bool(viterbi_replace_logits) __all__ = ["HaremPiiConfig"]