dylanlanigan
/

Q316

Model card Files Files and versions

xet

Community

dylanlanigan commited on May 8, 2025

Commit

79f8c7e

verified ·

1 Parent(s): 138f82e

Add files using upload-large-folder tool

Browse files

Files changed (2) hide show

mlc-chat-config.json +9 -2
mlc-chat-config.json.base +91 -0

mlc-chat-config.json CHANGED Viewed

@@ -12,9 +12,16 @@
     "num_key_value_heads": 4,
     "rms_norm_eps": 1e-06,
     "rope_theta": 1000000.0,
     "vocab_size": 151936,
     "tie_word_embeddings": false,
-    "context_window_size": 40960,
     "prefill_chunk_size": 2048,
     "tensor_parallel_shards": 1,
     "head_dim": 128,
@@ -28,7 +35,7 @@
     "norm_topk_prob": true
   },
   "vocab_size": 151936,
-  "context_window_size": 40960,
   "sliding_window_size": -1,
   "prefill_chunk_size": 2048,
   "attention_sink_size": -1,

     "num_key_value_heads": 4,
     "rms_norm_eps": 1e-06,
     "rope_theta": 1000000.0,
+    "rope_scaling": {
+      "factor": 4,
+      "mscale": 0.707,
+      "mscale_all_dim": 0.707,
+      "original_max_position_embeddings": 32768,
+      "type": "yarn"
+    },
     "vocab_size": 151936,
     "tie_word_embeddings": false,
+    "context_window_size": 131072,
     "prefill_chunk_size": 2048,
     "tensor_parallel_shards": 1,
     "head_dim": 128,
     "norm_topk_prob": true
   },
   "vocab_size": 151936,
+  "context_window_size": 131072,
   "sliding_window_size": -1,
   "prefill_chunk_size": 2048,
   "attention_sink_size": -1,

mlc-chat-config.json.base ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "version": "0.1.0",
+  "model_type": "qwen3_moe",
+  "quantization": "q4f16_1",
+  "model_config": {
+    "hidden_act": "silu",
+    "hidden_size": 2048,
+    "intermediate_size": 6144,
+    "attention_bias": false,
+    "num_attention_heads": 32,
+    "num_hidden_layers": 48,
+    "num_key_value_heads": 4,
+    "rms_norm_eps": 1e-06,
+    "rope_theta": 1000000.0,
+    "vocab_size": 151936,
+    "tie_word_embeddings": false,
+    "context_window_size": 40960,
+    "prefill_chunk_size": 2048,
+    "tensor_parallel_shards": 1,
+    "head_dim": 128,
+    "dtype": "float32",
+    "max_batch_size": 128,
+    "weight_block_size": null,
+    "moe_intermediate_size": 768,
+    "num_experts_per_tok": 8,
+    "num_experts": 128,
+    "decoder_sparse_step": 1,
+    "norm_topk_prob": true
+  },
+  "vocab_size": 151936,
+  "context_window_size": 40960,
+  "sliding_window_size": -1,
+  "prefill_chunk_size": 2048,
+  "attention_sink_size": -1,
+  "tensor_parallel_shards": 1,
+  "pipeline_parallel_stages": 1,
+  "temperature": 0.6,
+  "presence_penalty": 0.0,
+  "frequency_penalty": 0.0,
+  "repetition_penalty": 1.0,
+  "top_p": 0.95,
+  "tokenizer_files": [
+    "tokenizer.json",
+    "vocab.json",
+    "merges.txt",
+    "tokenizer_config.json"
+  ],
+  "tokenizer_info": {
+    "token_postproc_method": "byte_level",
+    "prepend_space_in_encode": false,
+    "strip_space_in_decode": false
+  },
+  "conv_template": {
+    "name": "qwen2",
+    "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
+    "system_message": "You are a helpful assistant.",
+    "system_prefix_token_ids": null,
+    "add_role_after_system_message": true,
+    "roles": {
+      "user": "<|im_start|>user",
+      "assistant": "<|im_start|>assistant"
+    },
+    "role_templates": {
+      "user": "{user_message}",
+      "assistant": "{assistant_message}",
+      "tool": "{tool_message}"
+    },
+    "messages": [],
+    "seps": [
+      "<|im_end|>\n"
+    ],
+    "role_content_sep": "\n",
+    "role_empty_sep": "\n",
+    "stop_str": [
+      "<|endoftext|>",
+      "<|im_end|>"
+    ],
+    "stop_token_ids": [
+      151643,
+      151645
+    ],
+    "function_string": "",
+    "use_function_calling": false
+  },
+  "pad_token_id": 151643,
+  "bos_token_id": 151643,
+  "eos_token_id": [
+    151645,
+    151643
+  ]
+}