Auron-510M / config.json
nyxia's picture
Upload Chimera 510M at step 249000
d498dc3 verified
raw
history blame contribute delete
756 Bytes
{
"dim": 1536,
"n_layers": 16,
"vocab_size": 151936,
"max_seq_len": 2048,
"n_heads": 24,
"n_kv_heads": 6,
"head_dim": 64,
"gdn_expand_v": 2,
"gdn_head_dim": 64,
"gdn_n_heads": 24,
"conv_kernel": 4,
"gdn_use_gate": true,
"gdn_use_short_conv": true,
"ffn_mult": 2.67,
"attn_interval": 4,
"use_x0_inject": true,
"use_resid_lambdas": true,
"use_skip_connections": true,
"use_diff_attn": false,
"rope_base": 10000.0,
"partial_rotary_factor": 0.25,
"n_bottom": 4,
"n_physical_top": 4,
"n_top_loops": 3,
"architecture": "Chimera",
"config_class": "ChimeraConfig",
"topology": "4 bottom + 4x3 top = 16 virtual",
"step": 249000,
"total_params": 510217280,
"size_label": "510M",
"model_type": "zara-ml"
}