| """ |
| Hugging Face configuration for the Sentiment Transformer. |
| |
| This file is **self-contained** — it has no dependency on the project's |
| ``config.py`` or ``config.toml``. It is copied verbatim into every HF |
| export directory so that ``AutoConfig.from_pretrained()`` works with |
| ``trust_remote_code=True``. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from transformers import PretrainedConfig |
|
|
|
|
| class SentimentTransformerConfig(PretrainedConfig): |
| """HuggingFace-compatible configuration for the custom sentiment |
| transformer encoder classifier. |
| |
| This maps the project's internal hyperparameter names to the |
| canonical HF field names used by ``AutoConfig`` / ``AutoModel``. |
| |
| Attributes |
| ---------- |
| vocab_size : int |
| Size of the BPE vocabulary. |
| hidden_size : int |
| Embedding / hidden dimension of the transformer. |
| intermediate_size : int |
| Inner (expanded) dimension of the position-wise FFN. |
| num_hidden_layers : int |
| Number of stacked transformer encoder blocks. |
| num_attention_heads : int |
| Number of parallel attention heads. |
| max_position_embeddings : int |
| Maximum supported input sequence length. |
| hidden_dropout_prob : float |
| Dropout probability used throughout the model. |
| num_labels : int |
| Number of output classes (2 for binary, 3 for ternary, etc.). |
| """ |
|
|
| model_type = "sentiment-transformer" |
|
|
| def __init__( |
| self, |
| vocab_size: int = 16_000, |
| hidden_size: int = 256, |
| intermediate_size: int = 1024, |
| num_hidden_layers: int = 6, |
| num_attention_heads: int = 8, |
| max_position_embeddings: int = 256, |
| hidden_dropout_prob: float = 0.1, |
| num_labels: int = 2, |
| pad_token_id: int = 0, |
| id2label: dict[int, str] | None = None, |
| label2id: dict[str, int] | None = None, |
| **kwargs, |
| ) -> None: |
| |
| |
| |
| |
| if id2label is not None and len(id2label) != num_labels: |
| num_labels = len(id2label) |
|
|
| |
| |
| kwargs.setdefault("problem_type", "single_label_classification") |
| super().__init__( |
| pad_token_id=pad_token_id, |
| num_labels=num_labels, |
| id2label=id2label, |
| label2id=label2id, |
| **kwargs, |
| ) |
| self.vocab_size = vocab_size |
| self.hidden_size = hidden_size |
| self.intermediate_size = intermediate_size |
| self.num_hidden_layers = num_hidden_layers |
| self.num_attention_heads = num_attention_heads |
| self.max_position_embeddings = max_position_embeddings |
| self.hidden_dropout_prob = hidden_dropout_prob |
|
|