| from transformers import PretrainedConfig
|
|
|
| class transformerConfig(PretrainedConfig):
|
| model_type = "custom_transformer"
|
|
|
| def __init__(
|
| self,
|
| src_vocab_len : int =184,
|
| tgt_vocab : int =201,
|
| num_hiddens : int =32,
|
| num_layers : int =2,
|
| dropout : int =0.1,
|
| batch_size : int =64,
|
| num_steps : int =10,
|
| lr : int =0.005,
|
| num_epochs : int =200,
|
|
|
| ffn_num_input : int =32,
|
| ffn_num_hiddens : int =64,
|
| num_heads : int =4,
|
| key_size : int =32,
|
| query_size : int =32,
|
| value_size : int =32,
|
| norm_shape : int =[32],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| **kwargs,
|
| ):
|
|
|
|
|
|
|
|
|
| self.src_vocab_len = src_vocab_len
|
| self.tgt_vocab = tgt_vocab
|
| self.num_hiddens = num_hiddens
|
| self.num_layers = num_layers
|
| self.dropout = dropout
|
| self.batch_size = batch_size
|
| self.num_steps = num_steps
|
| self.lr = lr
|
| self.num_epochs = num_epochs
|
| self.ffn_num_input = ffn_num_input
|
| self.ffn_num_hiddens = ffn_num_hiddens
|
| self.num_heads = num_heads
|
| self.key_size = key_size
|
| self.query_size = query_size
|
| self.value_size = value_size
|
| self.norm_shape = norm_shape
|
|
|
| super().__init__(**kwargs) |