{ "model_type": "afmoe", "architectures": [ "MOE" ], "profile_name": "1b_3e_8l_t4x2", "vocab_size": 200024, "text_embed_dim": 1024, "vision_embed_dim": 1024, "hidden_dim": 1024, "ffn_dim": 6144, "num_layers": 8, "num_heads": 16, "num_kv_heads": 4, "num_experts": 3, "top_k": 2, "max_position_embeddings": 16384, "router_aux_loss_coef": 0.01, "share_experts_across_layers": false, "gradient_checkpointing": true, "num_agents": 4, "moe_capacity_factor": 1.0, "moe_hierarchy_groups": 1, "moe_hierarchy_top_k": 1, "num_shared_experts": 0, "load_balancing_mode": "aux_free", "router_bias_update_rate": 0.01, "kv_latent_dim": 128, "kv_cache_dtype": "int4", "rope_training_context": 16384, "rope_ntk_alpha": 1.0, "rope_yarn_scale": 1.0, "ring_attention_chunk_size": 0, "prefill_chunk_size": 256, "use_q_former_projector": true, "q_former_queries": 8, "q_former_layers": 1, "tokenizer_name": "ai-tokenizer:GPT-5" }