| from transformers import PretrainedConfig
|
|
|
| repo_name = "BeardedMonster/SabiYarn-125M"
|
|
|
| class GPTJXConfig(PretrainedConfig):
|
| model_type="nanogpt-j"
|
|
|
|
|
| def __init__(self,
|
| block_size: int = 1024,
|
| vocab_size: int = 52050,
|
| n_layer: int = 12,
|
| n_head: int = 12,
|
| n_embd: int = 768,
|
| dropout: float = 0.0,
|
| bias: bool = False,
|
| **kwargs
|
| ):
|
|
|
| self.block_size = block_size
|
| self.vocab_size = vocab_size
|
| self.n_layer = n_layer
|
| self.n_head = n_head
|
| self.n_embd = n_embd
|
| self.dropout = dropout
|
| self.bias = bias
|
|
|
| super().__init__(**kwargs)
|
|
|
| |