OsaurusAI
/

MiniMax-M2.7-JANGTQ_K

Text Generation

jangtq-prestack

mixed-precision

Mixture of Experts

Model card Files Files and versions

Osaurus-AI commited on 2 days ago

Commit

8716d6a

·

verified ·

1 Parent(s): 552b81e

Upload config.json with huggingface_hub

Files changed (1) hide show

config.json +136 -0

config.json ADDED Viewed

	@@ -0,0 +1,136 @@

+{
+  "architectures": [
+    "MiniMaxM2ForCausalLM"
+  ],
+  "attn_type_list": [
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_minimax_m2.MiniMaxM2Config",
+    "AutoModelForCausalLM": "modeling_minimax_m2.MiniMaxM2ForCausalLM"
+  },
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 3072,
+  "intermediate_size": 1536,
+  "max_position_embeddings": 196608,
+  "model_type": "minimax_m2",
+  "mtp_transformer_layers": 0,
+  "num_attention_heads": 48,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 62,
+  "num_key_value_heads": 8,
+  "num_local_experts": 256,
+  "num_mtp_modules": 0,
+  "qk_norm_type": "per_layer",
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 5000000,
+  "rotary_dim": 64,
+  "scoring_func": "sigmoid",
+  "shared_intermediate_size": 0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.46.1",
+  "use_cache": true,
+  "use_mtp": false,
+  "use_qk_norm": true,
+  "use_routing_bias": true,
+  "vocab_size": 200064,
+  "_name_or_path": "M2.5-SLURPY",
+  "quantization": {
+    "bits": 8,
+    "mode": "affine",
+    "group_size": 64,
+    "routed_expert_bits": {
+      "gate_proj": 2,
+      "down_proj": 4,
+      "up_proj": 2
+    },
+    "mxtq_bits": {
+      "routed_expert": {
+        "gate_proj": 2,
+        "down_proj": 4,
+        "up_proj": 2
+      },
+      "attention": 8,
+      "shared_expert": 8,
+      "embed_tokens": 8,
+      "lm_head": 8,
+      "norms_router_biases": 16
+    }
+  },
+  "mxtq_bits": {
+    "routed_expert": {
+      "gate_proj": 2,
+      "down_proj": 4,
+      "up_proj": 2
+    },
+    "attention": 8,
+    "shared_expert": 8,
+    "embed_tokens": 8,
+    "lm_head": 8,
+    "norms_router_biases": 16
+  },
+  "weight_format": "mxtq"
+}