Update README.md
#3
by zheyishine - opened
README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
---
|
| 2 |
-
license: mit
|
| 3 |
-
language:
|
| 4 |
-
- en
|
| 5 |
-
---
|
| 6 |
## Ling-2.6-flash: Faster Responses, Stronger Execution, Higher Token Efficiency
|
| 7 |
### Introduction
|
| 8 |
Today, we announce the official open-source release of **Ling-2.6-flash**, an **instruct model** with **104B total parameters** and **7.4B active parameters**.
|
|
@@ -95,7 +95,7 @@ python -m sglang.launch_server \
|
|
| 95 |
--dp-size 1 \
|
| 96 |
--trust-remote-code \
|
| 97 |
--tool-call-parser qwen25 \
|
| 98 |
-
--json-model-override-args '{"
|
| 99 |
--dist-init-addr $MASTER_IP:2345 \
|
| 100 |
--port $PORT \
|
| 101 |
--nnodes 1
|
|
@@ -119,6 +119,7 @@ python -m sglang.launch_server \
|
|
| 119 |
--tp-size 4 \
|
| 120 |
--pp-size 1 \
|
| 121 |
--dp-size 1 \
|
|
|
|
| 122 |
--speculative-algorithm NEXTN \
|
| 123 |
--speculative-num-steps 1 \
|
| 124 |
--speculative-eagle-topk 1 \
|
|
@@ -127,7 +128,7 @@ python -m sglang.launch_server \
|
|
| 127 |
--max-running-requests 64 \
|
| 128 |
--max-mamba-cache-size 256 \
|
| 129 |
--tool-call-parser qwen25 \
|
| 130 |
-
--json-model-override-args '{"
|
| 131 |
--trust-remote-code \
|
| 132 |
--dist-init-addr $MASTER_IP:2345 \
|
| 133 |
--port $PORT \
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
---
|
| 6 |
## Ling-2.6-flash: Faster Responses, Stronger Execution, Higher Token Efficiency
|
| 7 |
### Introduction
|
| 8 |
Today, we announce the official open-source release of **Ling-2.6-flash**, an **instruct model** with **104B total parameters** and **7.4B active parameters**.
|
|
|
|
| 95 |
--dp-size 1 \
|
| 96 |
--trust-remote-code \
|
| 97 |
--tool-call-parser qwen25 \
|
| 98 |
+
--json-model-override-args '{"rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
|
| 99 |
--dist-init-addr $MASTER_IP:2345 \
|
| 100 |
--port $PORT \
|
| 101 |
--nnodes 1
|
|
|
|
| 119 |
--tp-size 4 \
|
| 120 |
--pp-size 1 \
|
| 121 |
--dp-size 1 \
|
| 122 |
+
--mamba-scheduler-strategy extra_buffer \
|
| 123 |
--speculative-algorithm NEXTN \
|
| 124 |
--speculative-num-steps 1 \
|
| 125 |
--speculative-eagle-topk 1 \
|
|
|
|
| 128 |
--max-running-requests 64 \
|
| 129 |
--max-mamba-cache-size 256 \
|
| 130 |
--tool-call-parser qwen25 \
|
| 131 |
+
--json-model-override-args '{"rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
|
| 132 |
--trust-remote-code \
|
| 133 |
--dist-init-addr $MASTER_IP:2345 \
|
| 134 |
--port $PORT \
|