| """ ChatGLM model configuration """ |
|
|
| from transformers.configuration_utils import PretrainedConfig |
| from transformers.utils import logging |
|
|
| logger = logging.get_logger(__name__) |
|
|
|
|
| class ChatGLMConfig(PretrainedConfig): |
| r""" |
| This is the configuration class to store the configuration of a [`~ChatGLMModel`]. |
| It is used to instantiate an ChatGLM model according to the specified arguments, defining the model |
| architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of |
| the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture. |
| |
| Configuration objects inherit from [`PretrainedConfig`] and can be used |
| to control the model outputs. Read the documentation from [`PretrainedConfig`] |
| for more information. |
| |
| |
| Args: |
| vocab_size (`int`, *optional*, defaults to 150528): |
| Vocabulary size of the ChatGLM-6B model. Defines the number of different tokens that can be represented by the |
| `inputs_ids` passed when calling [`~ChatGLMModel`] or |
| [`~TFChatGLMModel`]. |
| hidden_size (`int`, *optional*, defaults to 4096): |
| Dimension of the encoder layers and the pooler layer. |
| num_hidden_layers (`int`, *optional*, defaults to 28): |
| Number of hidden layers in the Transformer encoder. |
| num_attention_heads (`int`, *optional*, defaults to 32): |
| Number of attention heads for each attention layer in the Transformer encoder. |
| inner_hidden_size (`int`, *optional*, defaults to 16384): |
| Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. |
| max_sequence_length (`int`, *optional*, defaults to 512): |
| The maximum sequence length that this model might ever be used with. |
| Typically set this to something large just in case (e.g., 512 or 1024 or 2048). |
| layernorm_epsilon (`float`, *optional*, defaults to 1e-5): |
| The epsilon used by the layer normalization layers. |
| use_cache (`bool`, *optional*, defaults to `True`): |
| Whether the model should return the last key/values attentions (not used by all models). |
| Example: |
| |
| ```python |
| >>> from configuration_chatglm import ChatGLMConfig |
| >>> from modeling_chatglm import ChatGLMModel |
| |
| >>> # Initializing a ChatGLM-6B THUDM/ChatGLM-6B style configuration |
| >>> configuration = ChatGLMConfig() |
| |
| >>> # Initializing a model from the THUDM/ChatGLM-6B style configuration |
| >>> model = ChatGLMModel(configuration) |
| |
| >>> # Accessing the model configuration |
| >>> configuration = model.config |
| ``` |
| """ |
| model_type = "chatglm" |
|
|
| def __init__( |
| self, |
| vocab_size=150528, |
| hidden_size=4096, |
| num_layers=28, |
| num_attention_heads=32, |
| layernorm_epsilon=1e-5, |
| use_cache=False, |
| bos_token_id=150004, |
| eos_token_id=150005, |
| mask_token_id=150000, |
| gmask_token_id=150001, |
| pad_token_id=0, |
| max_sequence_length=2048, |
| inner_hidden_size=16384, |
| position_encoding_2d=True, |
| quantization_bit=0, |
| pre_seq_len=None, |
| prefix_projection=False, |
| **kwargs |
| ): |
| self.num_layers = num_layers |
| self.vocab_size = vocab_size |
| self.hidden_size = hidden_size |
| self.num_attention_heads = num_attention_heads |
| self.max_sequence_length = max_sequence_length |
| self.layernorm_epsilon = layernorm_epsilon |
| self.inner_hidden_size = inner_hidden_size |
| self.use_cache = use_cache |
| self.bos_token_id = bos_token_id |
| self.eos_token_id = eos_token_id |
| self.pad_token_id = pad_token_id |
| self.mask_token_id = mask_token_id |
| self.gmask_token_id = gmask_token_id |
| self.position_encoding_2d = position_encoding_2d |
| self.quantization_bit = quantization_bit |
| self.pre_seq_len = pre_seq_len |
| self.prefix_projection = prefix_projection |
|
|
| super().__init__( |
| pad_token_id=pad_token_id, |
| bos_token_id=bos_token_id, |
| eos_token_id=eos_token_id, |
| **kwargs |
| ) |
|
|