Auron-279M / config.json
nyxia's picture
Upload Chimera 279M at step 250000
074fbc0 verified
raw
history blame contribute delete
756 Bytes
{
"dim": 1024,
"n_layers": 16,
"vocab_size": 151936,
"max_seq_len": 2048,
"n_heads": 16,
"n_kv_heads": 4,
"head_dim": 64,
"gdn_expand_v": 2,
"gdn_head_dim": 64,
"gdn_n_heads": 16,
"conv_kernel": 4,
"gdn_use_gate": true,
"gdn_use_short_conv": true,
"ffn_mult": 2.67,
"attn_interval": 4,
"use_x0_inject": true,
"use_resid_lambdas": true,
"use_skip_connections": true,
"use_diff_attn": false,
"rope_base": 10000.0,
"partial_rotary_factor": 0.25,
"n_bottom": 4,
"n_physical_top": 4,
"n_top_loops": 3,
"architecture": "Chimera",
"config_class": "ChimeraConfig",
"topology": "4 bottom + 4x3 top = 16 virtual",
"step": 250000,
"total_params": 278664160,
"size_label": "279M",
"model_type": "zara-ml"
}