RubiRLM-1B-Base / rubirlm_config.json
DevHunterAI's picture
Upload folder using huggingface_hub
cd16f07 verified
raw
history blame contribute delete
701 Bytes
{
"vocab_size": 50257,
"max_seq_len": 2048,
"d_model": 1024,
"n_layers": 10,
"n_heads": 16,
"ff_mult": 4,
"dropout": 0.1,
"recurse_steps": 6,
"critique_threshold": 0.2,
"tie_embeddings": true,
"use_moe": true,
"moe_num_experts": 32,
"moe_top_k": 1,
"moe_expert_hidden": 1280,
"moe_router_jitter": 0.01,
"moe_aux_loss_weight": 0.01,
"use_layer_skip": true,
"layer_skip_threshold": 0.8,
"layer_skip_target": 0.03,
"layer_skip_aux_weight": 0.01,
"use_ternary_weights": true,
"use_flash_attention": true,
"use_fused_ops": true,
"packed_execution": true,
"use_torch_compile": false,
"moe_backend": "auto",
"moe_ep_size": 1
}