| { | |
| "dim": 1536, | |
| "n_layers": 16, | |
| "vocab_size": 151936, | |
| "max_seq_len": 2048, | |
| "n_heads": 24, | |
| "n_kv_heads": 6, | |
| "head_dim": 64, | |
| "gdn_expand_v": 2, | |
| "gdn_head_dim": 64, | |
| "gdn_n_heads": 24, | |
| "conv_kernel": 4, | |
| "gdn_use_gate": true, | |
| "gdn_use_short_conv": true, | |
| "ffn_mult": 2.67, | |
| "attn_interval": 4, | |
| "use_x0_inject": true, | |
| "use_resid_lambdas": true, | |
| "use_skip_connections": true, | |
| "use_diff_attn": false, | |
| "rope_base": 10000.0, | |
| "partial_rotary_factor": 0.25, | |
| "n_bottom": 4, | |
| "n_physical_top": 4, | |
| "n_top_loops": 3, | |
| "architecture": "Chimera", | |
| "config_class": "ChimeraConfig", | |
| "topology": "4 bottom + 4x3 top = 16 virtual", | |
| "step": 249000, | |
| "total_params": 510217280, | |
| "size_label": "510M", | |
| "model_type": "zara-ml" | |
| } |