"""
Hugging Face configuration for the Sentiment Transformer.

This file is **self-contained** — it has no dependency on the project's
``config.py`` or ``config.toml``.  It is copied verbatim into every HF
export directory so that ``AutoConfig.from_pretrained()`` works with
``trust_remote_code=True``.
"""

from __future__ import annotations

from transformers import PretrainedConfig


class SentimentTransformerConfig(PretrainedConfig):
    """HuggingFace-compatible configuration for the custom sentiment
    transformer encoder classifier.

    This maps the project's internal hyperparameter names to the
    canonical HF field names used by ``AutoConfig`` / ``AutoModel``.

    Attributes
    ----------
    vocab_size : int
        Size of the BPE vocabulary.
    hidden_size : int
        Embedding / hidden dimension of the transformer.
    intermediate_size : int
        Inner (expanded) dimension of the position-wise FFN.
    num_hidden_layers : int
        Number of stacked transformer encoder blocks.
    num_attention_heads : int
        Number of parallel attention heads.
    max_position_embeddings : int
        Maximum supported input sequence length.
    hidden_dropout_prob : float
        Dropout probability used throughout the model.
    num_labels : int
        Number of output classes (2 for binary, 3 for ternary, etc.).
    """

    model_type = "sentiment-transformer"

    def __init__(
        self,
        vocab_size: int = 16_000,
        hidden_size: int = 256,
        intermediate_size: int = 1024,
        num_hidden_layers: int = 6,
        num_attention_heads: int = 8,
        max_position_embeddings: int = 256,
        hidden_dropout_prob: float = 0.1,
        num_labels: int = 2,
        pad_token_id: int = 0,
        id2label: dict[int, str] | None = None,
        label2id: dict[str, int] | None = None,
        **kwargs,
    ) -> None:
        # When loading from a serialized config.json, `id2label` and
        # `num_labels` may both be present.  HF's PreTrainedConfig sets
        # ``num_labels = 2`` as a hidden default, which overrides the
        # id2label we saved.  Reconcile by deriving from id2label.
        if id2label is not None and len(id2label) != num_labels:
            num_labels = len(id2label)

        # `problem_type` may already be present in kwargs when loading from
        # a serialized config.json — use setdefault to avoid duplicate kwarg.
        kwargs.setdefault("problem_type", "single_label_classification")
        super().__init__(
            pad_token_id=pad_token_id,
            num_labels=num_labels,
            id2label=id2label,
            label2id=label2id,
            **kwargs,
        )
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.intermediate_size = intermediate_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.max_position_embeddings = max_position_embeddings
        self.hidden_dropout_prob = hidden_dropout_prob