Decode / config.json
Minh2508's picture
Update config.json
8a2f1d8 verified
{
"model_type": "afmoe",
"architectures": [
"MOE"
],
"profile_name": "1b_3e_8l_t4x2",
"vocab_size": 200024,
"text_embed_dim": 1024,
"vision_embed_dim": 1024,
"hidden_dim": 1024,
"ffn_dim": 6144,
"num_layers": 8,
"num_heads": 16,
"num_kv_heads": 4,
"num_experts": 3,
"top_k": 2,
"max_position_embeddings": 16384,
"router_aux_loss_coef": 0.01,
"share_experts_across_layers": false,
"gradient_checkpointing": true,
"num_agents": 4,
"moe_capacity_factor": 1.0,
"moe_hierarchy_groups": 1,
"moe_hierarchy_top_k": 1,
"num_shared_experts": 0,
"load_balancing_mode": "aux_free",
"router_bias_update_rate": 0.01,
"kv_latent_dim": 128,
"kv_cache_dtype": "int4",
"rope_training_context": 16384,
"rope_ntk_alpha": 1.0,
"rope_yarn_scale": 1.0,
"ring_attention_chunk_size": 0,
"prefill_chunk_size": 256,
"use_q_former_projector": true,
"q_former_queries": 8,
"q_former_layers": 1,
"tokenizer_name": "ai-tokenizer:GPT-5"
}