| { | |
| "arch_layout": ["m4", ["T24"], "m4"], | |
| "d_model": [1024, 2048], | |
| "d_intermediate": [0, 5504], | |
| "vocab_size": 256, | |
| "ssm_cfg": { | |
| "chunk_size": 256, | |
| "d_conv": 4, | |
| "d_state": 128, | |
| "expand": 2 | |
| }, | |
| "attn_cfg": { | |
| "num_heads": [16, 16], | |
| "rotary_emb_dim": [32, 64], | |
| "window_size": [1023, -1] | |
| }, | |
| "tie_embeddings": false | |
| } |