caizhi1 commited on
Commit
349c475
·
verified ·
1 Parent(s): 5e2e8d3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -2
README.md CHANGED
@@ -60,7 +60,7 @@ Whether the workload involves **long-context understanding** or **extended text
60
  </div>
61
 
62
  ### Quickstart
63
- #### SGLang
64
  ##### Environment Preparation
65
  ```bash
66
  pip install uv
@@ -70,7 +70,7 @@ uv venv ~/my_ling_env
70
  source ~/my_ling_env/bin/activate
71
 
72
  # uv pip "sglang-kernel>=0.4.1"
73
- uv pip install "sglang>=0.5.10.post1"
74
  ```
75
 
76
  ##### Run Inference
@@ -86,6 +86,8 @@ python -m sglang.launch_server \
86
  --pp-size 1 \
87
  --dp-size 1 \
88
  --trust-remote-code \
 
 
89
  --dist-init-addr $MASTER_IP:2345 \
90
  --port $PORT \
91
  --nnodes 1
@@ -116,6 +118,8 @@ python -m sglang.launch_server \
116
  --mem-fraction-static 0.75 \
117
  --max-running-requests 64 \
118
  --max-mamba-cache-size 256 \
 
 
119
  --trust-remote-code \
120
  --dist-init-addr $MASTER_IP:2345 \
121
  --port $PORT \
 
60
  </div>
61
 
62
  ### Quickstart
63
+ #### SGLang (Recommended)
64
  ##### Environment Preparation
65
  ```bash
66
  pip install uv
 
70
  source ~/my_ling_env/bin/activate
71
 
72
  # uv pip "sglang-kernel>=0.4.1"
73
+ uv pip install "sglang[all]>=0.5.10.post1" --prerelease=allow
74
  ```
75
 
76
  ##### Run Inference
 
86
  --pp-size 1 \
87
  --dp-size 1 \
88
  --trust-remote-code \
89
+ --tool-call-parser qwen25 \
90
+ --json-model-override-args '{"linear_backend": "seg_la", "torch_dtype": "bfloat16", "architectures": ["BailingMoeV2_5ForCausalLM"], "model_type": "bailing_hybrid", "rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
91
  --dist-init-addr $MASTER_IP:2345 \
92
  --port $PORT \
93
  --nnodes 1
 
118
  --mem-fraction-static 0.75 \
119
  --max-running-requests 64 \
120
  --max-mamba-cache-size 256 \
121
+ --tool-call-parser qwen25 \
122
+ --json-model-override-args '{"linear_backend": "seg_la", "torch_dtype": "bfloat16", "architectures": ["BailingMoeV2_5ForCausalLM"], "model_type": "bailing_hybrid", "rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
123
  --trust-remote-code \
124
  --dist-init-addr $MASTER_IP:2345 \
125
  --port $PORT \