caizhi1 zheyishine commited on
Commit
1089cfa
·
1 Parent(s): 180c00f

Update README.md (#1)

Browse files

- Update README.md (5059a36cefbd8b4975224d06b2cd6885a4b37005)
- Update README.md (b62a2f5997b6c8ee890abaf281924d7d05101b64)


Co-authored-by: Yao Zhao <zheyishine@users.noreply.huggingface.co>

Files changed (1) hide show
  1. README.md +5 -2
README.md CHANGED
@@ -86,8 +86,9 @@ python -m sglang.launch_server \
86
  --pp-size 1 \
87
  --dp-size 1 \
88
  --trust-remote-code \
 
89
  --tool-call-parser qwen25 \
90
- --json-model-override-args '{"linear_backend": "seg_la", "torch_dtype": "bfloat16", "architectures": ["BailingMoeV2_5ForCausalLM"], "model_type": "bailing_hybrid", "rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
91
  --dist-init-addr $MASTER_IP:2345 \
92
  --port $PORT \
93
  --nnodes 1
@@ -111,6 +112,8 @@ python -m sglang.launch_server \
111
  --tp-size 4 \
112
  --pp-size 1 \
113
  --dp-size 1 \
 
 
114
  --speculative-algorithm NEXTN \
115
  --speculative-num-steps 3 \
116
  --speculative-eagle-topk 1 \
@@ -119,7 +122,7 @@ python -m sglang.launch_server \
119
  --max-running-requests 64 \
120
  --max-mamba-cache-size 256 \
121
  --tool-call-parser qwen25 \
122
- --json-model-override-args '{"linear_backend": "seg_la", "torch_dtype": "bfloat16", "architectures": ["BailingMoeV2_5ForCausalLM"], "model_type": "bailing_hybrid", "rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
123
  --trust-remote-code \
124
  --dist-init-addr $MASTER_IP:2345 \
125
  --port $PORT \
 
86
  --pp-size 1 \
87
  --dp-size 1 \
88
  --trust-remote-code \
89
+ --context-length 262144 \
90
  --tool-call-parser qwen25 \
91
+ --json-model-override-args '{"rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
92
  --dist-init-addr $MASTER_IP:2345 \
93
  --port $PORT \
94
  --nnodes 1
 
112
  --tp-size 4 \
113
  --pp-size 1 \
114
  --dp-size 1 \
115
+ --context-length 262144 \
116
+ --mamba-scheduler-strategy extra_buffer \
117
  --speculative-algorithm NEXTN \
118
  --speculative-num-steps 3 \
119
  --speculative-eagle-topk 1 \
 
122
  --max-running-requests 64 \
123
  --max-mamba-cache-size 256 \
124
  --tool-call-parser qwen25 \
125
+ --json-model-override-args '{"rope_scaling": {"rope_type": "yarn", "factor": 2.0, "rope_theta": 6000000, "partial_rotary_factor": 0.5, "original_max_position_embeddings": 131072}}' \
126
  --trust-remote-code \
127
  --dist-init-addr $MASTER_IP:2345 \
128
  --port $PORT \