| { | |
| "model_type": "seqcond", | |
| "architectures": [ | |
| "SeqCondForCausalLM" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "configuration_seqcond.SeqCondConfig", | |
| "AutoModelForCausalLM": "modeling_seqcond.SeqCondForCausalLM", | |
| "AutoTokenizer": "tokenization_seqcond.SeqCondTokenizer" | |
| }, | |
| "transformers_version": "5.3.0", | |
| "d_model": 1024, | |
| "d_ff": 2730, | |
| "num_layers": 24, | |
| "vocab_size": 100300, | |
| "maxlen": 4096, | |
| "num_heads": 16, | |
| "num_kv_heads": 4, | |
| "qk_norm": true, | |
| "qk_norm_eps": 1e-06, | |
| "seqcond_heads": 16, | |
| "num_query_heads": 16, | |
| "num_thetas": 2, | |
| "conv_kernel_size": 4, | |
| "expand_factor": 2.0, | |
| "out_expand_factor": 3, | |
| "seqcond_ratio": 2, | |
| "skip_low_rank": false, | |
| "num_anchor_heads": 0, | |
| "eos_token_id": 100279, | |
| "pad_token_id": 100279, | |
| "bos_token_id": null | |
| } |