Run command that works
#2
by RamboRogers - opened
Works on RTX 3090
CUDA_VISIBLE_DEVICES=0 llama-server -hf llmfan46/Qwen3.5-27B-heretic-v3-GGUF --hf-file Qwen3.5-27B-heretic-v3-Q4_K_M.gguf \
-ngl 99 -c 65536 -fa on --cache-type-k q4_0 --cache-type-v q4_0 \
--split-mode none --main-gpu 0 --host 0.0.0.0 --jinja