| # MTP Mini Configuration | |
| model: | |
| vocab_size: 8000 | |
| d_model: 256 | |
| n_layers: 4 | |
| n_heads: 4 | |
| d_ff: 1024 | |
| max_seq_len: 128 | |
| dropout: 0.1 | |
| training: | |
| batch_size: 4 | |
| epochs: 20 | |
| learning_rate: 0.0003 | |
| weight_decay: 0.01 | |
| max_grad_norm: 1.0 | |
| num_threads: 4 | |
| save_every: 5 | |
| data: | |
| corpus_path: corpus/mtp_mini_corpus.jsonl |