harshad317's picture
Uploading the base model
2ca81ec verified
{
"sequence_len": 8192,
"vocab_size": 131072,
"n_layer": 16,
"n_head": 8,
"n_kv_head": 8,
"n_embd": 1024,
"moe_num_experts": 8,
"moe_top_k": 2,
"moe_layer_interval": 3,
"moe_group_size": 4,
"moe_expert_intermediate_size": 1792,
"moe_adjugate_intermediate_size": 0,
"moe_adjugate_scale": 0.05,
"moe_router_aux_loss_coef": 0.015,
"moe_router_bias_lr": 0.001,
"moe_activation_checkpoint": true,
"moe_capacity_factor": 0.75,
"rotary_scaling_type": "yarn",
"rotary_scale_factor": 4.0,
"residual_scale": -1.0,
"attn_dropout": 0.01,
"label_smoothing": 0.0,
"z_loss_weight": 0.0,
"use_flash_attention": true,
"domain_router_dim": 32,
"num_domain_tags": 128,
"domain_router_features": {
"dataset": {
"capacity": 128,
"mode": "one_hot"
},
"quality": {
"capacity": 32,
"mode": "one_hot"
},
"specialty": {
"capacity": 64,
"mode": "one_hot"
},
"modality": {
"capacity": 32,
"mode": "one_hot"
},
"language": {
"capacity": 32,
"mode": "one_hot"
},
"origin": {
"capacity": 8,
"mode": "one_hot"
}
}
}