| { | |
| "vocab_size": 50257, | |
| "emb_dim": 1536, | |
| "hidden_dim": 6144, | |
| "num_layers": 24, | |
| "num_heads": 16, | |
| "num_kv_heads": 8, | |
| "max_seq_len": 1024, | |
| "window_size": 1024, | |
| "sliding_window_ratio": 0.75, | |
| "rope_theta": 10000.0, | |
| "dtype": "torch.float16", | |
| "bias": false, | |
| "dropout": 0.0 | |
| } |