{ "dim": 1024, "n_layers": 16, "vocab_size": 151936, "max_seq_len": 2048, "n_heads": 16, "n_kv_heads": 4, "head_dim": 64, "gdn_expand_v": 2, "gdn_head_dim": 64, "gdn_n_heads": 16, "conv_kernel": 4, "gdn_use_gate": true, "gdn_use_short_conv": true, "ffn_mult": 2.67, "attn_interval": 4, "use_x0_inject": true, "use_resid_lambdas": true, "use_skip_connections": true, "use_diff_attn": false, "rope_base": 10000.0, "partial_rotary_factor": 0.25, "n_bottom": 4, "n_physical_top": 4, "n_top_loops": 3, "architecture": "Chimera", "config_class": "ChimeraConfig", "topology": "4 bottom + 4x3 top = 16 virtual", "step": 250000, "total_params": 278664160, "size_label": "279M", "model_type": "zara-ml" }