Dmitry Beresnev commited on
Commit
2c31416
·
1 Parent(s): e8080f5

change timeouts

Browse files
Files changed (1) hide show
  1. Dockerfile +3 -0
Dockerfile CHANGED
@@ -145,6 +145,9 @@ ENV DEFAULT_MODEL=QuantFactory/Qwen2.5-7B-Instruct-GGUF:q4_k_m \
145
  MANAGER_PORT=7860 \
146
  WORKER_BASE_PORT=8080 \
147
  SWITCH_TIMEOUT_SEC=300 \
 
 
 
148
  MODEL_N_CTX=8192 \
149
  MODEL_THREADS=4 \
150
  MODEL_NGL=0 \
 
145
  MANAGER_PORT=7860 \
146
  WORKER_BASE_PORT=8080 \
147
  SWITCH_TIMEOUT_SEC=300 \
148
+ REQUEST_TIMEOUT_SEC=300 \
149
+ DEFAULT_MAX_TOKENS=2048 \
150
+ MAX_TOKENS_PER_REQUEST=4096 \
151
  MODEL_N_CTX=8192 \
152
  MODEL_THREADS=4 \
153
  MODEL_NGL=0 \