| """ |
| Continue-1-OSS Model Configuration |
| """ |
|
|
| from transformers import LlamaConfig |
| from transformers.utils import logging |
|
|
| logger = logging.get_logger(__name__) |
|
|
|
|
| class Continue1Config(LlamaConfig): |
| model_type = "continue_oss" |
| |
| def __init__( |
| self, |
| vocab_size=156940, |
| hidden_size=3072, |
| intermediate_size=8192, |
| num_hidden_layers=28, |
| num_attention_heads=24, |
| num_key_value_heads=8, |
| head_dim=128, |
| hidden_act="silu", |
| max_position_embeddings=131072, |
| initializer_range=0.02, |
| rms_norm_eps=1e-05, |
| use_cache=True, |
| pad_token_id=None, |
| bos_token_id=128000, |
| eos_token_id=128001, |
| pretraining_tp=1, |
| tie_word_embeddings=True, |
| rope_theta=500000.0, |
| rope_scaling=None, |
| attention_bias=False, |
| attention_dropout=0.0, |
| mlp_bias=False, |
| **kwargs, |
| ): |
| super().__init__( |
| vocab_size=vocab_size, |
| hidden_size=hidden_size, |
| intermediate_size=intermediate_size, |
| num_hidden_layers=num_hidden_layers, |
| num_attention_heads=num_attention_heads, |
| num_key_value_heads=num_key_value_heads, |
| hidden_act=hidden_act, |
| max_position_embeddings=max_position_embeddings, |
| initializer_range=initializer_range, |
| rms_norm_eps=rms_norm_eps, |
| use_cache=use_cache, |
| pad_token_id=pad_token_id, |
| bos_token_id=bos_token_id, |
| eos_token_id=eos_token_id, |
| pretraining_tp=pretraining_tp, |
| tie_word_embeddings=tie_word_embeddings, |
| rope_theta=rope_theta, |
| rope_scaling=rope_scaling, |
| attention_bias=attention_bias, |
| attention_dropout=attention_dropout, |
| **kwargs, |
| ) |
| self.head_dim = head_dim |
| self.mlp_bias = mlp_bias |
|
|
|
|
| Continue1Config.register_for_auto_class() |
|
|