haremb-privacy-filter-opennemo / configuration_haremb_pii.py

Upload folder using huggingface_hub

f0f5785 3 days ago

1.91 kB

	"""
	HaremPiiConfig — subclass of OpenAIPrivacyFilterConfig that:
	* sets `model_type="haremb_pii"` (so AutoConfig + auto_map dispatch works
	with `trust_remote_code=True`)
	* paired with HaremPiiForTokenClassification in modeling_haremb_pii.py
	via `auto_map`

	This release is a 1-layer surgical slice of the OpenMed teacher:
	* num_hidden_layers=1
	* inference-only — Viterbi decoding is built into the forward pass.
	"""
	from __future__ import annotations

	from transformers.models.openai_privacy_filter.configuration_openai_privacy_filter import (
	OpenAIPrivacyFilterConfig,
	)


	class HaremPiiConfig(OpenAIPrivacyFilterConfig):
	"""
	HarEmb config. `model_type="haremb_pii"` disambiguates from upstream so
	AutoConfig + AutoModel mappings can target our subclasses without
	colliding with the registered OpenAIPrivacyFilterConfig entry.
	`modeling_haremb_pii` performs the auto-registration at import time.
	"""
	model_type = "haremb_pii"

	def __init__(
	self,
	use_viterbi_decode: bool = True,
	viterbi_replace_logits: bool = True,
	**kwargs,
	):
	super().__init__(**kwargs)
	# When True (and model is in eval mode), HaremPiiForTokenClassification.forward
	# runs constrained BIOES Viterbi over logits and attaches `predicted_labels`
	# to the output. Set to False to skip Viterbi entirely.
	self.use_viterbi_decode = bool(use_viterbi_decode)
	# When True (and Viterbi is on), forward replaces `outputs.logits` with a
	# one-hot-shaped tensor whose argmax equals the Viterbi prediction. This
	# makes HF `pipeline()` and any naive `logits.argmax(-1)` consumer use
	# Viterbi predictions automatically. The raw logits are preserved on
	# the output as `raw_logits`.
	self.viterbi_replace_logits = bool(viterbi_replace_logits)


	__all__ = ["HaremPiiConfig"]