sentinel-prime-350m / config.json
qubitpage's picture
Upload SentinelBrainForCausalLM
dd4f389 verified
raw
history blame contribute delete
902 Bytes
{
"architectures": [
"SentinelBrainForCausalLM"
],
"auto_map": {
"AutoConfig": "hf_model.SentinelBrainConfig",
"AutoModelForCausalLM": "hf_model.SentinelBrainForCausalLM"
},
"bos_token_id": null,
"d_ff": 2752,
"d_model": 1024,
"dropout": 0.0,
"dtype": "float32",
"eos_token_id": 100257,
"expert_capacity_factor": 1.25,
"expert_dropout": 0.0,
"hidden_size": 1024,
"max_seq_len": 2048,
"model_type": "sentinel_brain",
"n_active_experts": 1,
"n_experts": 1,
"n_heads": 16,
"n_kv_heads": 4,
"n_layers": 24,
"norm_eps": 1e-05,
"num_attention_heads": 16,
"num_hidden_layers": 24,
"pad_token_id": 100257,
"rope_theta": 500000.0,
"router_aux_loss_coeff": 0.01,
"router_z_loss_coeff": 0.001,
"routing_mode": "token_choice",
"tie_embeddings": false,
"tie_word_embeddings": false,
"transformers_version": "5.5.4",
"vocab_size": 100277
}