Update README.md
Browse files
README.md
CHANGED
|
@@ -60,7 +60,7 @@ Whether the workload involves **long-context understanding** or **extended text
|
|
| 60 |
</div>
|
| 61 |
|
| 62 |
### Quickstart
|
| 63 |
-
#### SGLang
|
| 64 |
##### Environment Preparation
|
| 65 |
```bash
|
| 66 |
pip install uv
|
|
@@ -70,7 +70,7 @@ uv venv ~/my_ling_env
|
|
| 70 |
source ~/my_ling_env/bin/activate
|
| 71 |
|
| 72 |
# uv pip "sglang-kernel>=0.4.1"
|
| 73 |
-
uv pip install "sglang>=0.5.10.post1"
|
| 74 |
```
|
| 75 |
|
| 76 |
##### Run Inference
|
|
@@ -86,6 +86,8 @@ python -m sglang.launch_server \
|
|
| 86 |
--pp-size 1 \
|
| 87 |
--dp-size 1 \
|
| 88 |
--trust-remote-code \
|
|
|
|
|
|
|
| 89 |
--dist-init-addr $MASTER_IP:2345 \
|
| 90 |
--port $PORT \
|
| 91 |
--nnodes 1
|
|
@@ -116,6 +118,8 @@ python -m sglang.launch_server \
|
|
| 116 |
--mem-fraction-static 0.75 \
|
| 117 |
--max-running-requests 64 \
|
| 118 |
--max-mamba-cache-size 256 \
|
|
|
|
|
|
|
| 119 |
--trust-remote-code \
|
| 120 |
--dist-init-addr $MASTER_IP:2345 \
|
| 121 |
--port $PORT \
|
|
|
|
| 60 |
</div>
|
| 61 |
|
| 62 |
### Quickstart
|
| 63 |
+
#### SGLang (Recommended)
|
| 64 |
##### Environment Preparation
|
| 65 |
```bash
|
| 66 |
pip install uv
|
|
|
|
| 70 |
source ~/my_ling_env/bin/activate
|
| 71 |
|
| 72 |
# uv pip "sglang-kernel>=0.4.1"
|
| 73 |
+
uv pip install "sglang[all]>=0.5.10.post1" --prerelease=allow
|
| 74 |
```
|
| 75 |
|
| 76 |
##### Run Inference
|
|
|
|
| 86 |
--pp-size 1 \
|
| 87 |
--dp-size 1 \
|
| 88 |
--trust-remote-code \
|
| 89 |
+
--tool-call-parser qwen25 \
|
| 90 |
+
--json-model-override-args '{"linear_backend": "seg_la", "torch_dtype": "bfloat16", "architectures": ["BailingMoeV2_5ForCausalLM"], "model_type": "bailing_hybrid", "rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
|
| 91 |
--dist-init-addr $MASTER_IP:2345 \
|
| 92 |
--port $PORT \
|
| 93 |
--nnodes 1
|
|
|
|
| 118 |
--mem-fraction-static 0.75 \
|
| 119 |
--max-running-requests 64 \
|
| 120 |
--max-mamba-cache-size 256 \
|
| 121 |
+
--tool-call-parser qwen25 \
|
| 122 |
+
--json-model-override-args '{"linear_backend": "seg_la", "torch_dtype": "bfloat16", "architectures": ["BailingMoeV2_5ForCausalLM"], "model_type": "bailing_hybrid", "rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
|
| 123 |
--trust-remote-code \
|
| 124 |
--dist-init-addr $MASTER_IP:2345 \
|
| 125 |
--port $PORT \
|