tinyMind-SFT / configuration_tinymind.py
HenrySentinel's picture
Add configuration_tinymind.py
3623324 verified
raw
history blame contribute delete
874 Bytes
"""TinyMind configuration."""
from transformers import PretrainedConfig
class TinyMindConfig(PretrainedConfig):
model_type = "tiny_smart_llm"
def __init__(
self,
vocab_size: int = 50257,
n_embd: int = 256,
n_heads: int = 8,
n_layers: int = 6,
max_seq_len: int = 512,
dropout: float = 0.1,
**kwargs,
):
self.vocab_size = vocab_size
self.n_embd = n_embd
self.n_heads = n_heads
self.n_layers = n_layers
self.num_hidden_layers = n_layers # HF generate() expects this
self.hidden_size = n_embd # HF convention
self.num_attention_heads = n_heads # HF convention
self.max_seq_len = max_seq_len
self.max_position_embeddings = max_seq_len # HF convention
self.dropout = dropout
super().__init__(**kwargs)