Instructions to use FrontiersMind/Nandi-Mini-150M with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use FrontiersMind/Nandi-Mini-150M with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="FrontiersMind/Nandi-Mini-150M", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("FrontiersMind/Nandi-Mini-150M", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use FrontiersMind/Nandi-Mini-150M with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "FrontiersMind/Nandi-Mini-150M" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FrontiersMind/Nandi-Mini-150M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/FrontiersMind/Nandi-Mini-150M
- SGLang
How to use FrontiersMind/Nandi-Mini-150M with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "FrontiersMind/Nandi-Mini-150M" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FrontiersMind/Nandi-Mini-150M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "FrontiersMind/Nandi-Mini-150M" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FrontiersMind/Nandi-Mini-150M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use FrontiersMind/Nandi-Mini-150M with Docker Model Runner:
docker model run hf.co/FrontiersMind/Nandi-Mini-150M
| # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. | |
| # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """Configuration base class and utilities.""" | |
| import copy | |
| import json | |
| import math | |
| import os | |
| from collections.abc import Sequence | |
| from dataclasses import dataclass | |
| from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeVar, Union | |
| from huggingface_hub import create_repo | |
| from huggingface_hub.dataclasses import strict | |
| from packaging import version | |
| from . import __version__ | |
| from .dynamic_module_utils import custom_object_save | |
| from .generation.configuration_utils import GenerationConfig | |
| from .modeling_gguf_pytorch_utils import load_gguf_checkpoint | |
| from .modeling_rope_utils import RotaryEmbeddingConfigMixin | |
| from .tokenization_utils_base import PreTrainedTokenizerBase | |
| from .utils import ( | |
| CONFIG_NAME, | |
| PushToHubMixin, | |
| cached_file, | |
| copy_func, | |
| extract_commit_hash, | |
| is_torch_available, | |
| logging, | |
| ) | |
| from .utils.generic import is_timm_config_dict | |
| if TYPE_CHECKING: | |
| import torch | |
| logger = logging.get_logger(__name__) | |
| # type hinting: specifying the type of config class that inherits from PreTrainedConfig | |
| SpecificPreTrainedConfigType = TypeVar("SpecificPreTrainedConfigType", bound="PreTrainedConfig") | |
| _FLOAT_TAG_KEY = "__float__" | |
| _FLOAT_TAG_VALUES = {"Infinity": float("inf"), "-Infinity": float("-inf"), "NaN": float("nan")} | |
| ALLOWED_LAYER_TYPES = ( | |
| "full_attention", | |
| "sliding_attention", | |
| "chunked_attention", | |
| "linear_attention", # used in minimax | |
| "conv", # used in LFMv2 | |
| "mamba", | |
| "attention", | |
| "sparse", | |
| "dense", | |
| ) | |
| class PreTrainedConfig(PushToHubMixin, RotaryEmbeddingConfigMixin): | |
| # no-format | |
| r""" | |
| Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as | |
| methods for loading/downloading/saving configurations. | |
| <Tip> | |
| A configuration file can be loaded and saved to disk. Loading the configuration file and using this file to | |
| initialize a model does **not** load the model weights. It only affects the model's configuration. | |
| </Tip> | |
| Class attributes (overridden by derived classes): | |
| - **model_type** (`str`) -- An identifier for the model type, serialized into the JSON file, and used to recreate | |
| the correct object in [`~transformers.AutoConfig`]. | |
| - **has_no_defaults_at_init** (`bool`) -- Whether the config class can be initialized without providing input arguments. | |
| Some configurations requires inputs to be defined at init and have no default values, usually these are composite configs, | |
| (but not necessarily) such as [`~transformers.EncoderDecoderConfig`] or [`~RagConfig`]. They have to be initialized from | |
| two or more configs of type [`~transformers.PreTrainedConfig`]. | |
| - **keys_to_ignore_at_inference** (`list[str]`) -- A list of keys to ignore by default when looking at dictionary | |
| outputs of the model during inference. | |
| - **attribute_map** (`dict[str, str]`) -- A dict that maps model specific attribute names to the standardized | |
| naming of attributes. | |
| - **base_model_tp_plan** (`dict[str, Any]`) -- A dict that maps sub-modules FQNs of a base model to a tensor | |
| parallel plan applied to the sub-module when `model.tensor_parallel` is called. | |
| - **base_model_pp_plan** (`dict[str, tuple[list[str]]]`) -- A dict that maps child-modules of a base model to a | |
| pipeline parallel plan that enables users to place the child-module on the appropriate device. | |
| Common attributes (present in all subclasses): | |
| - **vocab_size** (`int`) -- The number of tokens in the vocabulary, which is also the first dimension of the | |
| embeddings matrix (this attribute may be missing for models that don't have a text modality like ViT). | |
| - **hidden_size** (`int`) -- The hidden size of the model. | |
| - **num_attention_heads** (`int`) -- The number of attention heads used in the multi-head attention layers of the | |
| model. | |
| - **num_hidden_layers** (`int`) -- The number of blocks in the model. | |
| <Tip warning={true}> | |
| Setting parameters for sequence generation in the model config is deprecated. For backward compatibility, loading | |
| some of them will still be possible, but attempting to overwrite them will throw an exception -- you should set | |
| them in a [~transformers.GenerationConfig]. Check the documentation of [~transformers.GenerationConfig] for more | |
| information about the individual parameters. | |
| </Tip> | |
| Arg: | |
| name_or_path (`str`, *optional*, defaults to `""`): | |
| Store the string that was passed to [`PreTrainedModel.from_pretrained`] as `pretrained_model_name_or_path` | |
| if the configuration was created with such a method. | |
| output_hidden_states (`bool`, *optional*, defaults to `False`): | |
| Whether or not the model should return all hidden-states. | |
| output_attentions (`bool`, *optional*, defaults to `False`): | |
| Whether or not the model should returns all attentions. | |
| return_dict (`bool`, *optional*, defaults to `True`): | |
| Whether or not the model should return a [`~transformers.utils.ModelOutput`] instead of a plain tuple. | |
| is_encoder_decoder (`bool`, *optional*, defaults to `False`): | |
| Whether the model is used as an encoder/decoder or not. | |
| chunk_size_feed_forward (`int`, *optional*, defaults to `0`): | |
| The chunk size of all feed forward layers in the residual attention blocks. A chunk size of `0` means that | |
| the feed forward layer is not chunked. A chunk size of n means that the feed forward layer processes `n` < | |
| sequence_length embeddings at a time. For more information on feed forward chunking, see [How does Feed | |
| Forward Chunking work?](../glossary.html#feed-forward-chunking). | |
| > Parameters for fine-tuning tasks | |
| architectures (`list[str]`, *optional*): | |
| Model architectures that can be used with the model pretrained weights. | |
| id2label (`dict[int, str]`, *optional*): | |
| A map from index (for instance prediction index, or target index) to label. | |
| label2id (`dict[str, int]`, *optional*): | |
| A map from label to index for the model. | |
| num_labels (`int`, *optional*): | |
| Number of labels to use in the last layer added to the model, typically for a classification task. | |
| problem_type (`str`, *optional*): | |
| Problem type for `XxxForSequenceClassification` models. Can be one of `"regression"`, | |
| `"single_label_classification"` or `"multi_label_classification"`. | |
| > PyTorch specific parameters | |
| dtype (`str`, *optional*): | |
| The `dtype` of the weights. This attribute can be used to initialize the model to a non-default `dtype` | |
| (which is normally `float32`) and thus allow for optimal storage allocation. For example, if the saved | |
| model is `float16`, ideally we want to load it back using the minimal amount of memory needed to load | |
| `float16` weights. | |
| """ | |
| # Class attributes that we don't want to save or have in `self.__dict__` | |
| # They are not supposed to be set/changed by users. Each field is set when | |
| # creating a model class | |
| base_config_key: ClassVar[str] = "" | |
| sub_configs: ClassVar[dict[str, type["PreTrainedConfig"]]] = {} | |
| has_no_defaults_at_init: ClassVar[bool] = False | |
| keys_to_ignore_at_inference: ClassVar[list[str]] = [] | |
| attribute_map: ClassVar[dict[str, str]] = {} | |
| base_model_tp_plan: ClassVar[dict[str, Any] | None] = None | |
| base_model_pp_plan: ClassVar[dict[str, Sequence[list[str]]] | None] = None | |
| base_model_ep_plan: ClassVar[dict[str, Sequence[list[str]]] | None] = None | |
| _auto_class: ClassVar[str | None] = None | |
| # Attributes set internally when saving and used to infer model | |
| # class for `Auto` mapping | |
| model_type: ClassVar[str] = "" | |
| transformers_version: str | None = None | |
| architectures: list[str] | None = None | |
| # Common attributes for all models | |
| output_hidden_states: bool | None = False | |
| return_dict: bool | None = True | |
| dtype: Union[str, "torch.dtype"] | None = None | |
| chunk_size_feed_forward: int = 0 | |
| is_encoder_decoder: bool = False | |
| # Fine-tuning task arguments | |
| id2label: dict[int, str] | dict[str, str] | None = None | |
| label2id: dict[str, int] | dict[str, str] | None = None | |
| problem_type: Literal["regression", "single_label_classification", "multi_label_classification"] | None = None | |
| # Tokenizer kwargs | |
| tokenizer_class: str | PreTrainedTokenizerBase | None = None | |
| def __post_init__(self, **kwargs): | |
| # BC for the `torch_dtype` argument instead of the simpler `dtype` | |
| # Do not warn, as it would otherwise always be triggered since most configs on the hub have `torch_dtype` | |
| if (torch_dtype := kwargs.pop("torch_dtype", None)) is not None: | |
| # If both are provided, keep `dtype` | |
| self.dtype = self.dtype if self.dtype is not None else torch_dtype | |
| if self.dtype is not None and isinstance(self.dtype, str) and is_torch_available(): | |
| # we will start using self.dtype in v5, but to be consistent with | |
| # from_pretrained's dtype arg convert it to an actual torch.dtype object | |
| import torch | |
| self.dtype = getattr(torch, self.dtype) | |
| # Keep the default value of `num_labels=2` in case users have saved a classfier with 2 labels | |
| # Our configs prev wouldn't save `id2label` for 2 labels because it is the default. In all other | |
| # cases we expect the config dict to have an `id2label` field if it's a clf model, or not otherwise | |
| if self.id2label is None: | |
| self.num_labels = kwargs.get("num_labels", 2) | |
| else: | |
| if kwargs.get("num_labels") is not None and len(self.id2label) != kwargs.get("num_labels"): | |
| logger.warning( | |
| f"You passed `num_labels={kwargs.get('num_labels')}` which is incompatible to " | |
| f"the `id2label` map of length `{len(self.id2label)}`." | |
| ) | |
| # Keys are always strings in JSON so convert ids to int | |
| self.id2label = {int(key): value for key, value in self.id2label.items()} | |
| # BC for rotary embeddings. We will pop out legacy keys from kwargs and rename to new format | |
| if hasattr(self, "rope_parameters"): | |
| kwargs = self.convert_rope_params_to_dict(**kwargs) | |
| # Parameters for sequence generation saved in the config are popped instead of loading them. | |
| for parameter_name in GenerationConfig._get_default_generation_params().keys(): | |
| kwargs.pop(parameter_name, None) | |
| # Name or path to the pretrained checkpoint | |
| self._name_or_path = str(kwargs.pop("name_or_path", "")) | |
| self._commit_hash = kwargs.pop("_commit_hash", None) | |
| # Attention/Experts implementation to use, if relevant (it sets it recursively on sub-configs) | |
| self._output_attentions: bool | None = kwargs.pop("output_attentions", False) | |
| self._attn_implementation: str | None = kwargs.pop("attn_implementation", None) | |
| self._experts_implementation: str | None = kwargs.pop("experts_implementation", None) | |
| # Additional attributes without default values | |
| for key, value in kwargs.items(): | |
| # Check this to avoid deserializing problematic fields from hub configs - they should use the public field | |
| if key not in ("_attn_implementation_internal", "_experts_implementation_internal"): | |
| try: | |
| setattr(self, key, value) | |
| except AttributeError as err: | |
| logger.error(f"Can't set {key} with value {value} for {self}") | |
| raise err | |
| def __init_subclass__(cls, *args, **kwargs): | |
| super().__init_subclass__(*args, **kwargs) | |
| cls = dataclass(cls, repr=False) | |
| def name_or_path(self) -> str | None: | |
| return getattr(self, "_name_or_path", None) | |
| def name_or_path(self, value): | |
| self._name_or_path = str(value) # Make sure that name_or_path is a string (for JSON encoding) | |
| def num_labels(self) -> int: | |
| """ | |
| `int`: The number of labels for classification models. | |
| """ | |
| return len(self.id2label) if self.id2label is not None else None | |
| def num_labels(self, num_labels: int): | |
| # we do not store `num_labels` attribute in config, but instead | |
| # compute it based on the length of the `id2label` map | |
| if self.id2label is None or self.num_labels != num_labels: | |
| self.id2label = {i: f"LABEL_{i}" for i in range(num_labels)} | |
| self.label2id = dict(zip(self.id2label.values(), self.id2label.keys())) | |
| def output_attentions(self): | |
| """ | |
| `bool`: Whether or not the model should returns all attentions. | |
| """ | |
| return self._output_attentions | |
| def output_attentions(self, value: bool): | |
| # If we set `output_attentions` explicitly before the attn implementation, dispatch eager | |
| if value and self._attn_implementation is None: | |
| self._attn_implementation = "eager" | |
| if value and self._attn_implementation != "eager": | |
| raise ValueError( | |
| "The `output_attentions` attribute is not supported when using the `attn_implementation` set to " | |
| f"{self._attn_implementation}. Please set it to 'eager' instead." | |
| ) | |
| self._output_attentions = value | |
| def _attn_implementation(self): | |
| return self._attn_implementation_internal | |
| def _attn_implementation(self, value: str | dict | None): | |
| """We set it recursively on the sub-configs as well""" | |
| # Set if for current config | |
| current_attn = getattr(self, "_attn_implementation", None) | |
| attn_implementation = value if not isinstance(value, dict) else value.get("", current_attn) | |
| self._attn_implementation_internal = attn_implementation | |
| # Set it recursively on the subconfigs | |
| for subconfig_key in self.sub_configs: | |
| subconfig = getattr(self, subconfig_key, None) | |
| if subconfig is not None: | |
| current_subconfig_attn = getattr(subconfig, "_attn_implementation", None) | |
| sub_implementation = ( | |
| value if not isinstance(value, dict) else value.get(subconfig_key, current_subconfig_attn) | |
| ) | |
| subconfig._attn_implementation = sub_implementation | |
| def _experts_implementation(self): | |
| return self._experts_implementation_internal | |
| def _experts_implementation(self, value: str | dict | None): | |
| """We set it recursively on the sub-configs as well""" | |
| # Set if for current config | |
| current_moe = getattr(self, "_experts_implementation", None) | |
| experts_implementation = value if not isinstance(value, dict) else value.get("", current_moe) | |
| self._experts_implementation_internal = experts_implementation | |
| # Set it recursively on the subconfigs | |
| for subconfig_key in self.sub_configs: | |
| subconfig = getattr(self, subconfig_key, None) | |
| if subconfig is not None: | |
| current_subconfig_moe = getattr(subconfig, "_experts_implementation", None) | |
| sub_implementation = ( | |
| value if not isinstance(value, dict) else value.get(subconfig_key, current_subconfig_moe) | |
| ) | |
| subconfig._experts_implementation = sub_implementation | |
| def torch_dtype(self): | |
| logger.warning_once("`torch_dtype` is deprecated! Use `dtype` instead!") | |
| return self.dtype | |
| def use_return_dict(self): | |
| logger.warning_once("`use_return_dict` is deprecated! Use `return_dict` instead!") | |
| return self.return_dict | |
| def torch_dtype(self, value): | |
| logger.warning_once("`torch_dtype` is deprecated! Use `dtype` instead!") | |
| self.dtype = value | |
| def __setattr__(self, key, value): | |
| if key in super().__getattribute__("attribute_map"): | |
| key = super().__getattribute__("attribute_map")[key] | |
| super().__setattr__(key, value) | |
| def __getattribute__(self, key): | |
| if key != "attribute_map" and key in super().__getattribute__("attribute_map"): | |
| key = super().__getattribute__("attribute_map")[key] | |
| return super().__getattribute__(key) | |
| def validate_output_attentions(self): | |
| if self.output_attentions and self._attn_implementation not in ["eager", None]: | |
| raise ValueError( | |
| "The `output_attentions` attribute is not supported when using the `attn_implementation` set to " | |
| f"{self._attn_implementation}. Please set it to 'eager' instead." | |
| ) | |
| def validate_architecture(self): | |
| """Part of `@strict`-powered validation. Validates the architecture of the config.""" | |
| if ( | |
| hasattr(self, "head_dim") | |
| and hasattr(self, "num_heads") | |
| and hasattr(self, "embed_dim") | |
| and self.head_dim * self.num_heads != self.embed_dim | |
| ): | |
| raise ValueError( | |
| f"The embed_dim ({self.embed_dim}) is not a multiple of the number of attention " | |
| f"heads ({self.num_heads})." | |
| ) | |
| def validate_token_ids(self): | |
| """Part of `@strict`-powered validation. Validates the contents of the special tokens.""" | |
| text_config = self.get_text_config(decoder=True) | |
| vocab_size = getattr(text_config, "vocab_size", None) | |
| if vocab_size is not None: | |
| # Check for all special tokens, e..g. pad_token_id, image_token_id, audio_token_id | |
| for value in text_config: | |
| if value.endswith("_token_id") and isinstance(value, int) and not 0 <= value < vocab_size: | |
| # Can't be an exception until we can load configs that fail validation: several configs on the Hub | |
| # store invalid special tokens, e.g. `pad_token_id=-1` | |
| logger.warning_once( | |
| f"Model config: {value} must be `None` or an integer within the vocabulary (between 0 " | |
| f"and {vocab_size - 1}), got {value}. This may result in unexpected behavior." | |
| ) | |
| def validate_layer_type(self): | |
| """Check that `layer_types` is correctly defined.""" | |
| if not (getattr(self, "layer_types", None) is not None and hasattr(self, "num_hidden_layers")): | |
| return | |
| elif not all(layer_type in ALLOWED_LAYER_TYPES for layer_type in self.layer_types): | |
| raise ValueError(f"The `layer_types` entries must be in {ALLOWED_LAYER_TYPES} but got {self.layer_types}") | |
| elif self.num_hidden_layers is not None and self.num_hidden_layers != len(self.layer_types): | |
| raise ValueError( | |
| f"`num_hidden_layers` ({self.num_hidden_layers}) must be equal to the number of layer types " | |
| f"({len(self.layer_types)})" | |
| ) | |
| def rope_scaling(self): | |
| return self.rope_parameters | |
| def rope_scaling(self, value): | |
| self.rope_parameters = value | |
| def save_pretrained(self, save_directory: str | os.PathLike, push_to_hub: bool = False, **kwargs): | |
| """ | |
| Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the | |
| [`~PreTrainedConfig.from_pretrained`] class method. | |
| Args: | |
| save_directory (`str` or `os.PathLike`): | |
| Directory where the configuration JSON file will be saved (will be created if it does not exist). | |
| push_to_hub (`bool`, *optional*, defaults to `False`): | |
| Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the | |
| repository you want to push to with `repo_id` (will default to the name of `save_directory` in your | |
| namespace). | |
| kwargs (`dict[str, Any]`, *optional*): | |
| Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. | |
| """ | |
| if os.path.isfile(save_directory): | |
| raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") | |
| generation_parameters = self._get_generation_parameters() | |
| if len(generation_parameters) > 0: | |
| raise ValueError( | |
| "Some generation parameters are set in the model config. These should go into `model.generation_config`" | |
| f"as opposed to `model.config`. \nGeneration parameters found: {str(generation_parameters)}", | |
| ) | |
| os.makedirs(save_directory, exist_ok=True) | |
| if push_to_hub: | |
| commit_message = kwargs.pop("commit_message", None) | |
| repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) | |
| repo_id = create_repo(repo_id, exist_ok=True, **kwargs).repo_id | |
| files_timestamps = self._get_files_timestamps(save_directory) | |
| # This attribute is important to know on load, but should not be serialized on save. | |
| if "transformers_weights" in self: | |
| delattr(self, "transformers_weights") | |
| # If we have a custom config, we copy the file defining it in the folder and set the attributes so it can be | |
| # loaded from the Hub. | |
| if self._auto_class is not None: | |
| custom_object_save(self, save_directory, config=self) | |
| # If we save using the predefined names, we can load using `from_pretrained` | |
| output_config_file = os.path.join(save_directory, CONFIG_NAME) | |
| # Strict validation at save-time: prevent bad patterns from propagating | |
| # Using `strict` decorator guarantees that `self.validate` exists , but not all | |
| # model config might have the decorator added | |
| if hasattr(self, "validate"): | |
| self.validate() | |
| self.to_json_file(output_config_file, use_diff=True) | |
| logger.info(f"Configuration saved in {output_config_file}") | |
| if push_to_hub: | |
| self._upload_modified_files( | |
| save_directory, | |
| repo_id, | |
| files_timestamps, | |
| commit_message=commit_message, | |
| token=kwargs.get("token"), | |
| ) | |
| def from_pretrained( | |
| cls: type[SpecificPreTrainedConfigType], | |
| pretrained_model_name_or_path: str | os.PathLike, | |
| cache_dir: str | os.PathLike | None = None, | |
| force_download: bool = False, | |
| local_files_only: bool = False, | |
| token: str | bool | None = None, | |
| revision: str = "main", | |
| **kwargs, | |
| ) -> SpecificPreTrainedConfigType: | |
| r""" | |
| Instantiate a [`PreTrainedConfig`] (or a derived class) from a pretrained model configuration. | |
| Args: | |
| pretrained_model_name_or_path (`str` or `os.PathLike`): | |
| This can be either: | |
| - a string, the *model id* of a pretrained model configuration hosted inside a model repo on | |
| huggingface.co. | |
| - a path to a *directory* containing a configuration file saved using the | |
| [`~PreTrainedConfig.save_pretrained`] method, e.g., `./my_model_directory/`. | |
| - a path to a saved configuration JSON *file*, e.g., `./my_model_directory/configuration.json`. | |
| cache_dir (`str` or `os.PathLike`, *optional*): | |
| Path to a directory in which a downloaded pretrained model configuration should be cached if the | |
| standard cache should not be used. | |
| force_download (`bool`, *optional*, defaults to `False`): | |
| Whether or not to force to (re-)download the configuration files and override the cached versions if | |
| they exist. | |
| proxies (`dict[str, str]`, *optional*): | |
| A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', | |
| 'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request. | |
| token (`str` or `bool`, *optional*): | |
| The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use | |
| the token generated when running `hf auth login` (stored in `~/.huggingface`). | |
| revision (`str`, *optional*, defaults to `"main"`): | |
| The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a | |
| git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any | |
| identifier allowed by git. | |
| <Tip> | |
| To test a pull request you made on the Hub, you can pass `revision="refs/pr/<pr_number>"`. | |
| </Tip> | |
| return_unused_kwargs (`bool`, *optional*, defaults to `False`): | |
| If `False`, then this function returns just the final configuration object. | |
| If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a | |
| dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the | |
| part of `kwargs` which has not been used to update `config` and is otherwise ignored. | |
| subfolder (`str`, *optional*, defaults to `""`): | |
| In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can | |
| specify the folder name here. | |
| kwargs (`dict[str, Any]`, *optional*): | |
| The values in kwargs of any keys which are configuration attributes will be used to override the loaded | |
| values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled | |
| by the `return_unused_kwargs` keyword parameter. | |
| Returns: | |
| [`PreTrainedConfig`]: The configuration object instantiated from this pretrained model. | |
| Examples: | |
| ```python | |
| # We can't instantiate directly the base class *PreTrainedConfig* so let's show the examples on a | |
| # derived class: BertConfig | |
| config = BertConfig.from_pretrained( | |
| "google-bert/bert-base-uncased" | |
| ) # Download configuration from huggingface.co and cache. | |
| config = BertConfig.from_pretrained( | |
| "./test/saved_model/" | |
| ) # E.g. config (or model) was saved using *save_pretrained('./test/saved_model/')* | |
| config = BertConfig.from_pretrained("./test/saved_model/my_configuration.json") | |
| config = BertConfig.from_pretrained("google-bert/bert-base-uncased", output_attentions=True, foo=False) | |
| assert config.output_attentions == True | |
| config, unused_kwargs = BertConfig.from_pretrained( | |
| "google-bert/bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True | |
| ) | |
| assert config.output_attentions == True | |
| assert unused_kwargs == {"foo": False} | |
| ```""" | |
| kwargs["cache_dir"] = cache_dir | |
| kwargs["force_download"] = force_download | |
| kwargs["local_files_only"] = local_files_only | |
| kwargs["revision"] = revision | |
| config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs) | |
| if cls.base_config_key and cls.base_config_key in config_dict: | |
| config_dict = config_dict[cls.base_config_key] | |
| if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type: | |
| # sometimes the config has no `base_config_key` if the config is used in several composite models | |
| # e.g. LlamaConfig. In that case we try to see if there is match in `model_type` before raising a warning | |
| for v in config_dict.values(): | |
| if isinstance(v, dict) and v.get("model_type") == cls.model_type: | |
| config_dict = v | |
| # raise warning only if we still can't see a match in `model_type` | |
| if config_dict["model_type"] != cls.model_type: | |
| logger.warning( | |
| f"You are using a model of type `{config_dict['model_type']}` to instantiate a model of type " | |
| f"`{cls.model_type}`. This may be expected if you are loading a checkpoint that shares a subset " | |
| f"of the architecture (e.g., loading a `sam2_video` checkpoint into `Sam2Model`), but is otherwise " | |
| f"not supported and can yield errors. Please verify that the checkpoint is compatible with the " | |
| f"model you are instantiating." | |
| ) | |
| return cls.from_dict(config_dict, **kwargs) | |
| def get_config_dict( | |
| cls, pretrained_model_name_or_path: str | os.PathLike, **kwargs | |
| ) -> tuple[dict[str, Any], dict[str, Any]]: | |
| """ | |
| From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a | |
| [`PreTrainedConfig`] using `from_dict`. | |
| Parameters: | |
| pretrained_model_name_or_path (`str` or `os.PathLike`): | |
| The identifier of the pre-trained checkpoint from which we want the dictionary of parameters. | |
| Returns: | |
| `tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the configuration object. | |
| """ | |
| original_kwargs = copy.deepcopy(kwargs) | |
| # Get config dict associated with the base config file | |
| config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) | |
| if config_dict is None: | |
| return {}, kwargs | |
| if "_commit_hash" in config_dict: | |
| original_kwargs["_commit_hash"] = config_dict["_commit_hash"] | |
| # That config file may point us toward another config file to use. | |
| if "configuration_files" in config_dict: | |
| configuration_file = get_configuration_file(config_dict["configuration_files"]) | |
| config_dict, kwargs = cls._get_config_dict( | |
| pretrained_model_name_or_path, _configuration_file=configuration_file, **original_kwargs | |
| ) | |
| return config_dict, kwargs | |
| def _get_config_dict( | |
| cls, pretrained_model_name_or_path: str | os.PathLike, **kwargs | |
| ) -> tuple[dict[str, Any], dict[str, Any]]: | |
| cache_dir = kwargs.pop("cache_dir", None) | |
| force_download = kwargs.pop("force_download", False) | |
| proxies = kwargs.pop("proxies", None) | |
| token = kwargs.pop("token", None) | |
| local_files_only = kwargs.pop("local_files_only", False) | |
| revision = kwargs.pop("revision", None) | |
| trust_remote_code = kwargs.pop("trust_remote_code", None) | |
| subfolder = kwargs.pop("subfolder", "") | |
| from_pipeline = kwargs.pop("_from_pipeline", None) | |
| from_auto_class = kwargs.pop("_from_auto", False) | |
| commit_hash = kwargs.pop("_commit_hash", None) | |
| gguf_file = kwargs.get("gguf_file") | |
| if trust_remote_code is True: | |
| logger.warning( | |
| "The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is" | |
| " ignored." | |
| ) | |
| user_agent = {"file_type": "config", "from_auto_class": from_auto_class} | |
| if from_pipeline is not None: | |
| user_agent["using_pipeline"] = from_pipeline | |
| pretrained_model_name_or_path = str(pretrained_model_name_or_path) | |
| is_local = os.path.isdir(pretrained_model_name_or_path) | |
| if os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)): | |
| # Special case when pretrained_model_name_or_path is a local file | |
| resolved_config_file = pretrained_model_name_or_path | |
| is_local = True | |
| else: | |
| configuration_file = kwargs.pop("_configuration_file", CONFIG_NAME) if gguf_file is None else gguf_file | |
| try: | |
| # Load from local folder or from cache or download from model Hub and cache | |
| resolved_config_file = cached_file( | |
| pretrained_model_name_or_path, | |
| configuration_file, | |
| cache_dir=cache_dir, | |
| force_download=force_download, | |
| proxies=proxies, | |
| local_files_only=local_files_only, | |
| token=token, | |
| user_agent=user_agent, | |
| revision=revision, | |
| subfolder=subfolder, | |
| _commit_hash=commit_hash, | |
| ) | |
| if resolved_config_file is None: | |
| return None, kwargs | |
| commit_hash = extract_commit_hash(resolved_config_file, commit_hash) | |
| except OSError: | |
| # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to | |
| # the original exception. | |
| raise | |
| except Exception: | |
| # For any other exception, we throw a generic error. | |
| raise OSError( | |
| f"Can't load the configuration of '{pretrained_model_name_or_path}'. If you were trying to load it" | |
| " from 'https://huggingface.co/models', make sure you don't have a local directory with the same" | |
| f" name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory" | |
| f" containing a {configuration_file} file" | |
| ) | |
| try: | |
| if gguf_file: | |
| config_dict = load_gguf_checkpoint(resolved_config_file, return_tensors=False)["config"] | |
| else: | |
| # Load config dict | |
| config_dict = cls._dict_from_json_file(resolved_config_file) | |
| config_dict["_commit_hash"] = commit_hash | |
| except (json.JSONDecodeError, UnicodeDecodeError): | |
| raise OSError(f"It looks like the config file at '{resolved_config_file}' is not a valid JSON file.") | |
| if is_local: | |
| logger.info(f"loading configuration file {resolved_config_file}") | |
| else: | |
| logger.info(f"loading configuration file {configuration_file} from cache at {resolved_config_file}") | |
| # timm models are not saved with the model_type in the config file | |
| if "model_type" not in config_dict and is_timm_config_dict(config_dict): | |
| config_dict["model_type"] = "timm_wrapper" | |
| return config_dict, kwargs | |
| def from_dict( | |
| cls: type[SpecificPreTrainedConfigType], config_dict: dict[str, Any], **kwargs | |
| ) -> SpecificPreTrainedConfigType: | |
| """ | |
| Instantiates a [`PreTrainedConfig`] from a Python dictionary of parameters. | |
| Args: | |
| config_dict (`dict[str, Any]`): | |
| Dictionary that will be used to instantiate the configuration object. Such a dictionary can be | |
| retrieved from a pretrained checkpoint by leveraging the [`~PreTrainedConfig.get_config_dict`] method. | |
| kwargs (`dict[str, Any]`): | |
| Additional parameters from which to initialize the configuration object. | |
| Returns: | |
| [`PreTrainedConfig`]: The configuration object instantiated from those parameters. | |
| """ | |
| return_unused_kwargs = kwargs.pop("return_unused_kwargs", False) | |
| # The commit hash might have been updated in the `config_dict`, we don't want the kwargs to erase that update. | |
| if "_commit_hash" in kwargs and "_commit_hash" in config_dict: | |
| kwargs.setdefault("_commit_hash", config_dict["_commit_hash"]) | |
| # To remove arg here are those passed along for our internal telemetry but we still need to remove them | |
| to_remove = ["_from_auto", "_from_pipeline"] | |
| valid_fields = [ | |
| "num_labels", | |
| "attn_implementation", | |
| "experts_implementation", | |
| "output_attentions", | |
| "torch_dtype", | |
| "dtype", | |
| "name_or_path", | |
| ] | |
| for key, value in kwargs.items(): | |
| if key in valid_fields: | |
| if key not in ["torch_dtype", "dtype"]: | |
| config_dict[key] = value | |
| to_remove.append(key) | |
| elif value != "auto": | |
| config_dict[key] = value | |
| config = cls(**config_dict) | |
| for key, value in kwargs.items(): | |
| if hasattr(config, key): | |
| current_attr = getattr(config, key) | |
| # To authorize passing a custom subconfig as kwarg in models that have nested configs. | |
| # We need to update only custom kwarg values instead and keep other attr in subconfig. | |
| if isinstance(current_attr, PreTrainedConfig) and isinstance(value, dict): | |
| current_attr_updated = current_attr.to_dict() | |
| current_attr_updated.update(value) | |
| value = current_attr.__class__(**current_attr_updated) | |
| setattr(config, key, value) | |
| to_remove.append(key) | |
| for key in to_remove: | |
| kwargs.pop(key, None) | |
| logger.info(f"Model config {config}") | |
| if return_unused_kwargs: | |
| return config, kwargs | |
| else: | |
| return config | |
| def from_json_file( | |
| cls: type[SpecificPreTrainedConfigType], json_file: str | os.PathLike | |
| ) -> SpecificPreTrainedConfigType: | |
| """ | |
| Instantiates a [`PreTrainedConfig`] from the path to a JSON file of parameters. | |
| Args: | |
| json_file (`str` or `os.PathLike`): | |
| Path to the JSON file containing the parameters. | |
| Returns: | |
| [`PreTrainedConfig`]: The configuration object instantiated from that JSON file. | |
| """ | |
| config_dict = cls._dict_from_json_file(json_file) | |
| return cls(**config_dict) | |
| def _dict_from_json_file(cls, json_file: str | os.PathLike): | |
| with open(json_file, encoding="utf-8") as reader: | |
| text = reader.read() | |
| config_dict = json.loads(text) | |
| return cls._decode_special_floats(config_dict) | |
| def _encode_special_floats(cls, obj: Any) -> Any: | |
| """ | |
| Iterates over the passed object and encode specific floats that cannot be JSON-serialized. Python's JSON | |
| engine saves floats like `Infinity` (+/-) or `NaN` which are not compatible with other JSON engines. | |
| It serializes floats like `Infinity` as an object: `{'__float__': Infinity}`. | |
| """ | |
| if isinstance(obj, float): | |
| if math.isnan(obj): | |
| return {_FLOAT_TAG_KEY: "NaN"} | |
| if obj == float("inf"): | |
| return {_FLOAT_TAG_KEY: "Infinity"} | |
| if obj == float("-inf"): | |
| return {_FLOAT_TAG_KEY: "-Infinity"} | |
| return obj | |
| if isinstance(obj, dict): | |
| return {k: cls._encode_special_floats(v) for k, v in obj.items()} | |
| if isinstance(obj, (list, tuple)): | |
| return [cls._encode_special_floats(v) for v in obj] | |
| return obj | |
| def _decode_special_floats(cls, obj: Any) -> Any: | |
| """ | |
| Iterates over the passed object and decode specific floats that cannot be JSON-serialized. Python's JSON | |
| engine saves floats like `Infinity` (+/-) or `NaN` which are not compatible with other JSON engines. | |
| This method deserializes objects like `{'__float__': Infinity}` to their float values like `Infinity`. | |
| """ | |
| if isinstance(obj, dict): | |
| if set(obj.keys()) == {_FLOAT_TAG_KEY} and isinstance(obj[_FLOAT_TAG_KEY], str): | |
| tag = obj[_FLOAT_TAG_KEY] | |
| if tag in _FLOAT_TAG_VALUES: | |
| return _FLOAT_TAG_VALUES[tag] | |
| return obj | |
| return {k: cls._decode_special_floats(v) for k, v in obj.items()} | |
| if isinstance(obj, list): | |
| return [cls._decode_special_floats(v) for v in obj] | |
| return obj | |
| def __eq__(self, other): | |
| return isinstance(other, PreTrainedConfig) and (self.__dict__ == other.__dict__) | |
| def __repr__(self): | |
| return f"{self.__class__.__name__} {self.to_json_string()}" | |
| def __iter__(self): | |
| yield from self.__dict__ | |
| def to_diff_dict(self) -> dict[str, Any]: | |
| """ | |
| Removes all attributes from the configuration that correspond to the default config attributes for | |
| better readability, while always retaining the `config` attribute from the class. Serializes to a | |
| Python dictionary. | |
| Returns: | |
| dict[str, Any]: Dictionary of all the attributes that make up this configuration instance. | |
| """ | |
| config_dict = self.to_dict() | |
| # Get the default config dict (from a fresh PreTrainedConfig instance) | |
| default_config_dict = PreTrainedConfig().to_dict() | |
| # get class specific config dict | |
| class_config_dict = self.__class__().to_dict() if not self.has_no_defaults_at_init else {} | |
| serializable_config_dict = {} | |
| # Only serialize values that differ from the default config, | |
| # except always keep the 'config' attribute. | |
| for key, value in config_dict.items(): | |
| if ( | |
| isinstance(getattr(self, key, None), PreTrainedConfig) | |
| and key in class_config_dict | |
| and isinstance(class_config_dict[key], dict) | |
| ): | |
| # For nested configs we need to clean the diff recursively | |
| diff = recursive_diff_dict(value, default_config_dict, config_obj=getattr(self, key, None)) | |
| if "model_type" in value: | |
| # Needs to be set even if it's not in the diff | |
| diff["model_type"] = value["model_type"] | |
| serializable_config_dict[key] = diff | |
| elif ( | |
| key not in default_config_dict | |
| or key == "transformers_version" | |
| or key == "vocab_file" | |
| or value != default_config_dict[key] | |
| or (key in default_config_dict and value != class_config_dict.get(key, value)) | |
| ): | |
| serializable_config_dict[key] = value | |
| self._remove_keys_not_serialized(serializable_config_dict) | |
| # Key removed only in diff dict | |
| if "_name_or_path" in serializable_config_dict: | |
| del serializable_config_dict["_name_or_path"] | |
| if hasattr(self, "quantization_config"): | |
| serializable_config_dict["quantization_config"] = ( | |
| self.quantization_config.to_dict() | |
| if not isinstance(self.quantization_config, dict) and self.quantization_config is not None | |
| else self.quantization_config | |
| ) | |
| self.dict_dtype_to_str(serializable_config_dict) | |
| return serializable_config_dict | |
| def to_dict(self) -> dict[str, Any]: | |
| """ | |
| Serializes this instance to a Python dictionary. | |
| Returns: | |
| `dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance. | |
| """ | |
| output = copy.deepcopy(self.__dict__) | |
| if hasattr(self.__class__, "model_type"): | |
| output["model_type"] = self.__class__.model_type | |
| # Transformers version when serializing the model | |
| output["transformers_version"] = __version__ | |
| # Pop "kwargs" since they are unpacked and set in the post init | |
| output.pop("kwargs", None) | |
| def to_list(value): | |
| if isinstance(value, tuple): | |
| value = [to_list(item) for item in value] | |
| return value | |
| for key, value in output.items(): | |
| # Deal with nested configs like CLIP | |
| if isinstance(value, PreTrainedConfig): | |
| value = value.to_dict() | |
| del value["transformers_version"] | |
| # Some models have defaults as tuples because dataclass | |
| # doesn't allow mutables. Let's convert back to `list`` | |
| elif isinstance(value, tuple): | |
| value = to_list(value) | |
| output[key] = value | |
| self._remove_keys_not_serialized(output) | |
| if hasattr(self, "quantization_config"): | |
| output["quantization_config"] = ( | |
| self.quantization_config.to_dict() | |
| if not isinstance(self.quantization_config, dict) and self.quantization_config is not None | |
| else self.quantization_config | |
| ) | |
| self.dict_dtype_to_str(output) | |
| return output | |
| def to_json_string(self, use_diff: bool = True) -> str: | |
| """ | |
| Serializes this instance to a JSON string. | |
| Args: | |
| use_diff (`bool`, *optional*, defaults to `True`): | |
| If set to `True`, only the difference between the config instance and the default `PreTrainedConfig()` | |
| is serialized to JSON string. | |
| Returns: | |
| `str`: String containing all the attributes that make up this configuration instance in JSON format. | |
| """ | |
| if use_diff is True: | |
| config_dict = self.to_diff_dict() | |
| else: | |
| config_dict = self.to_dict() | |
| # Handle +/-Infinity and NaNs | |
| config_dict = self._encode_special_floats(config_dict) | |
| return json.dumps(config_dict, indent=2, sort_keys=True) + "\n" | |
| def to_json_file(self, json_file_path: str | os.PathLike, use_diff: bool = True): | |
| """ | |
| Save this instance to a JSON file. | |
| Args: | |
| json_file_path (`str` or `os.PathLike`): | |
| Path to the JSON file in which this configuration instance's parameters will be saved. | |
| use_diff (`bool`, *optional*, defaults to `True`): | |
| If set to `True`, only the difference between the config instance and the default `PreTrainedConfig()` | |
| is serialized to JSON file. | |
| """ | |
| with open(json_file_path, "w", encoding="utf-8") as writer: | |
| writer.write(self.to_json_string(use_diff=use_diff)) | |
| def update(self, config_dict: dict[str, Any]): | |
| """ | |
| Updates attributes of this class with attributes from `config_dict`. | |
| Args: | |
| config_dict (`dict[str, Any]`): Dictionary of attributes that should be updated for this class. | |
| """ | |
| for key, value in config_dict.items(): | |
| setattr(self, key, value) | |
| def update_from_string(self, update_str: str): | |
| """ | |
| Updates attributes of this class with attributes from `update_str`. | |
| The expected format is ints, floats and strings as is, and for booleans use `true` or `false`. For example: | |
| "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" | |
| The keys to change have to already exist in the config object. | |
| Args: | |
| update_str (`str`): String with attributes that should be updated for this class. | |
| """ | |
| d = dict(x.split("=") for x in update_str.split(",")) | |
| for k, v in d.items(): | |
| if not hasattr(self, k): | |
| raise ValueError(f"key {k} isn't in the original config dict") | |
| old_v = getattr(self, k) | |
| if isinstance(old_v, bool): | |
| if v.lower() in ["true", "1", "y", "yes"]: | |
| v = True | |
| elif v.lower() in ["false", "0", "n", "no"]: | |
| v = False | |
| else: | |
| raise ValueError(f"can't derive true or false from {v} (key {k})") | |
| elif isinstance(old_v, int): | |
| v = int(v) | |
| elif isinstance(old_v, float): | |
| v = float(v) | |
| elif not isinstance(old_v, str): | |
| raise TypeError( | |
| f"You can only update int, float, bool or string values in the config, got {v} for key {k}" | |
| ) | |
| setattr(self, k, v) | |
| def dict_dtype_to_str(self, d: dict[str, Any]) -> None: | |
| """ | |
| Checks whether the passed dictionary and its nested dicts have a *dtype* key and if it's not None, | |
| converts torch.dtype to a string of just the type. For example, `torch.float32` get converted into *"float32"* | |
| string, which can then be stored in the json format. | |
| """ | |
| if d.get("dtype") is not None: | |
| if isinstance(d["dtype"], dict): | |
| d["dtype"] = {k: str(v).split(".")[-1] for k, v in d["dtype"].items()} | |
| # models like Emu3 can have "dtype" as token in config's vocabulary map, | |
| # so we also exclude int type here to avoid error in this special case. | |
| elif not isinstance(d["dtype"], (str, int)): | |
| d["dtype"] = str(d["dtype"]).split(".")[1] | |
| for value in d.values(): | |
| if isinstance(value, dict): | |
| self.dict_dtype_to_str(value) | |
| def _remove_keys_not_serialized(self, d: dict[str, Any]) -> None: | |
| """ | |
| Checks and removes if there are any keys in the dict that should not be serialized when saving the config. | |
| Runs recursive check on the dict, to remove from all sub configs. | |
| """ | |
| for key_to_remove in [ | |
| "_is_quantized", | |
| "_auto_class", | |
| "_commit_hash", | |
| "_attn_implementation_internal", | |
| "_experts_implementation_internal", | |
| "ignore_keys_at_rope_validation", | |
| "base_model_tp_plan", | |
| "base_model_pp_plan", | |
| ]: | |
| d.pop(key_to_remove, None) | |
| if "_output_attentions" in d: | |
| d["output_attentions"] = d.pop("_output_attentions") | |
| for value in d.values(): | |
| if isinstance(value, dict): | |
| self._remove_keys_not_serialized(value) | |
| def register_for_auto_class(cls, auto_class="AutoConfig"): | |
| """ | |
| Register this class with a given auto class. This should only be used for custom configurations as the ones in | |
| the library are already mapped with `AutoConfig`. | |
| Args: | |
| auto_class (`str` or `type`, *optional*, defaults to `"AutoConfig"`): | |
| The auto class to register this new configuration with. | |
| """ | |
| if not isinstance(auto_class, str): | |
| auto_class = auto_class.__name__ | |
| import transformers.models.auto as auto_module | |
| if not hasattr(auto_module, auto_class): | |
| raise ValueError(f"{auto_class} is not a valid auto class.") | |
| cls._auto_class = auto_class | |
| def _get_generation_parameters(self) -> dict[str, Any]: | |
| """ | |
| Checks if there are generation parameters in `PreTrainedConfig` instance. Note that | |
| we should not save generation params in PreTrainedConfig, and we will raise error | |
| if there are any. | |
| """ | |
| generation_params = {} | |
| default_config = self.__class__().to_dict() if not self.has_no_defaults_at_init else {} | |
| for key in GenerationConfig._get_default_generation_params().keys(): | |
| if key == "use_cache": | |
| continue # common key for most models | |
| if hasattr(self, key) and getattr(self, key) is not None and key not in default_config: | |
| generation_params[key] = getattr(self, key) | |
| return generation_params | |
| def get_text_config(self, decoder=None, encoder=None) -> "PreTrainedConfig": | |
| """ | |
| Returns the text config related to the text input (encoder) or text output (decoder) of the model. The | |
| `decoder` and `encoder` input arguments can be used to specify which end of the model we are interested in, | |
| which is useful on models that have both text input and output modalities. | |
| There are three possible outcomes of using this method: | |
| 1. On most models, it returns the original config instance itself. | |
| 2. On newer (2024+) composite models, it returns the text section of the config, which is nested under a set | |
| of valid names. | |
| 3. On older (2023-) composite models, it discards decoder-only parameters when `encoder=True` and vice-versa. | |
| Args: | |
| decoder (`Optional[bool]`, *optional*): | |
| If set to `True`, then only search for decoder config names. | |
| encoder (`Optional[bool]`, *optional*): | |
| If set to `True`, then only search for encoder config names. | |
| """ | |
| return_both = decoder == encoder # both unset or both set -> search all possible names | |
| decoder_possible_text_config_names = ("decoder", "generator", "text_config") | |
| encoder_possible_text_config_names = ("text_encoder",) | |
| if return_both: | |
| possible_text_config_names = encoder_possible_text_config_names + decoder_possible_text_config_names | |
| elif decoder: | |
| possible_text_config_names = decoder_possible_text_config_names | |
| else: | |
| possible_text_config_names = encoder_possible_text_config_names | |
| valid_text_config_names = [] | |
| for text_config_name in possible_text_config_names: | |
| if hasattr(self, text_config_name): | |
| text_config = getattr(self, text_config_name, None) | |
| if text_config is not None: | |
| valid_text_config_names += [text_config_name] | |
| if len(valid_text_config_names) > 1: | |
| raise ValueError( | |
| f"Multiple valid text configs were found in the model config: {valid_text_config_names}. In this " | |
| "case, using `get_text_config()` would be ambiguous. Please specify the desired text config directly, " | |
| "e.g. `text_config = config.sub_config_name`" | |
| ) | |
| elif len(valid_text_config_names) == 1: | |
| config_to_return = getattr(self, valid_text_config_names[0]) | |
| else: | |
| config_to_return = self | |
| # handle legacy models with flat config structure, when we only want one of the configs | |
| if not return_both and len(valid_text_config_names) == 0 and config_to_return.is_encoder_decoder: | |
| config_to_return = copy.deepcopy(config_to_return) | |
| prefix_to_keep = "decoder" if decoder else "encoder" | |
| for key in config_to_return.to_dict(): | |
| # NOTE: We can't discard keys because: | |
| # 1) we can't truly delete a cls attribte on a dataclass; 2) we can't set the value to `None` due to | |
| # strict validation. So we just keep it as is, since there are only a couple old models falling in this condition | |
| if key.startswith(prefix_to_keep): | |
| # [encoder/decoder]_layers -> num_hidden_layers | |
| if key == prefix_to_keep + "_layers": | |
| new_key = "num_hidden_layers" | |
| # [encoder/decoder]_attention_heads -> num_attention_heads | |
| elif key == prefix_to_keep + "_attention_heads": | |
| new_key = "num_attention_heads" | |
| # e.g. encoder_hidden_act -> hidden_act | |
| else: | |
| new_key = key[len(prefix_to_keep) + 1 :] | |
| # Does the class map the new key into a different attribute name at read time? if so, let's write | |
| # into that attribute instead | |
| if new_key in config_to_return.attribute_map: | |
| new_key = config_to_return.attribute_map[new_key] | |
| value = getattr(config_to_return, key) | |
| delattr(config_to_return, key) | |
| setattr(config_to_return, new_key, value) | |
| return config_to_return | |
| def get_configuration_file(configuration_files: list[str]) -> str: | |
| """ | |
| Get the configuration file to use for this version of transformers. | |
| Args: | |
| configuration_files (`list[str]`): The list of available configuration files. | |
| Returns: | |
| `str`: The configuration file to use. | |
| """ | |
| configuration_files_map = {} | |
| for file_name in configuration_files: | |
| if file_name.startswith("config.") and file_name.endswith(".json") and file_name != "config.json": | |
| v = file_name.removeprefix("config.").removesuffix(".json") | |
| configuration_files_map[v] = file_name | |
| available_versions = sorted(configuration_files_map.keys()) | |
| # Defaults to FULL_CONFIGURATION_FILE and then try to look at some newer versions. | |
| configuration_file = CONFIG_NAME | |
| transformers_version = version.parse(__version__) | |
| for v in available_versions: | |
| if version.parse(v) <= transformers_version: | |
| configuration_file = configuration_files_map[v] | |
| else: | |
| # No point going further since the versions are sorted. | |
| break | |
| return configuration_file | |
| def recursive_diff_dict(dict_a, dict_b, config_obj=None): | |
| """ | |
| Helper function to recursively take the diff between two nested dictionaries. The resulting diff only contains the | |
| values from `dict_a` that are different from values in `dict_b`. | |
| dict_b : the default config dictionary. We want to remove values that are in this one | |
| """ | |
| diff = {} | |
| default = config_obj.__class__().to_dict() if config_obj is not None else {} | |
| for key, value in dict_a.items(): | |
| obj_value = getattr(config_obj, str(key), None) | |
| if isinstance(obj_value, PreTrainedConfig) and key in dict_b and isinstance(dict_b[key], dict): | |
| diff_value = recursive_diff_dict(value, dict_b[key], config_obj=obj_value) | |
| diff[key] = diff_value | |
| elif key not in dict_b or (value != default[key]): | |
| diff[key] = value | |
| return diff | |
| PreTrainedConfig.push_to_hub = copy_func(PreTrainedConfig.push_to_hub) | |
| if PreTrainedConfig.push_to_hub.__doc__ is not None: | |
| PreTrainedConfig.push_to_hub.__doc__ = PreTrainedConfig.push_to_hub.__doc__.format( | |
| object="config", object_class="AutoConfig", object_files="configuration file" | |
| ) | |
| # The alias is only here for BC - we did not have the correct CamelCasing before | |
| PretrainedConfig = PreTrainedConfig | |
| def layer_type_validation(layer_types: list[str], num_hidden_layers: int | None = None, attention: bool = True): | |
| logger.warning( | |
| "`layer_type_validation` is deprecated and will be removed in v5.20. " | |
| "Use `PreTrainedConfig.validate_layer_type` instead" | |
| ) | |
| if not all(layer_type in ALLOWED_LAYER_TYPES for layer_type in layer_types): | |
| raise ValueError(f"The `layer_types` entries must be in {ALLOWED_LAYER_TYPES}") | |
| if num_hidden_layers is not None and num_hidden_layers != len(layer_types): | |
| raise ValueError( | |
| f"`num_hidden_layers` ({num_hidden_layers}) must be equal to the number of layer types " | |
| f"({len(layer_types)})" | |
| ) | |