YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
https://github.com/KLL535/ComfyUI_Simple_Qwen3-VL-gguf
git clone https://github.com/JamePeng/llama-cpp-python
cd llama-cpp-python
git clone https://github.com/ggml-org/llama.cpp ./vendor/llama.cpp
$env:CMAKE_ARGS = "-DGGML_CUDA=on"
D:\ComfyUI\venv\Scripts\python -m pip install -e . --verbose
After compilation is finished, the llama-cpp-python folder must not be deleted.
There is no need to compile it manually anymore. You can use JamePeng’s precompiled WHL package, as long as the CUDA version matches exactly.
https://github.com/1038lab/ComfyUI-QwenVL
https://github.com/JamePeng/llama-cpp-python
https://developer.nvidia.com/cuda-toolkit-archive
Gemma 4 requires llama‑cpp‑python ≥ 0.3.35
ComfyUI\custom_nodes\ComfyUI_Simple_Qwen3-VL-gguf\system_prompts_user.json
{
"_system_prompts": {
},
"_user_prompt_styles": {
},
"_camera_preset": {
},
"_model_presets": {
"gemma-4-E4B-it-IQ4_XS": {
"model_path": "D:\\ComfyUI\\models\\LLM\\gemma-4-E4B-it-IQ4_XS.gguf",
"mmproj_path": "D:\\ComfyUI\\models\\LLM\\gemma-4-E4B-it-mmproj-BF16.gguf",
"output_max_tokens": 2048,
"ctx": 8192,
"n_batch": 2048,
"n_ubatch": 2048,
"gpu_layers": -1,
"temperature": 1.0,
"top_p": 0.95,
"min_p": 0.01,
"top_k": 64,
"repeat_penalty": 1.0,
"chat_handler": "gemma4",
"script": "qwen3vl_run.py",
"silent": false,
"debug": true,
"verbose": true,
"raw_mode": true,
"prompt_template": "<|turn>system\n{system}<turn|>\n<|turn>user\n{images}\n{user}<turn|>\n<|turn>model\n",
"stop": ["<turn|>", "<eos>", "<|end_of_turn|>"]
},
"Huihui-Qwen3.5-9B-abliterated.i1-IQ4_XS": {
"model_path": "D:\\ComfyUI\\models\\LLM\\Huihui-Qwen3.5-9B-abliterated.i1-IQ4_XS.gguf",
"mmproj_path": "D:\\ComfyUI\\models\\LLM\\Qwen3.5-9B-mmproj-BF16.gguf",
"output_max_tokens": 2048,
"image_min_tokens": 1024,
"image_max_tokens": 2048,
"ctx": 8192,
"n_batch": 2048,
"n_ubatch": 512,
"gpu_layers": -1,
"temperature": 0.7,
"top_p": 0.8,
"min_p": 0.05,
"top_k": 20,
"repeat_penalty": 1.0,
"present_penalty": 1.5,
"pool_size": 4194304,
"chat_handler": "qwen35",
"enable_thinking": false,
"script": "qwen3vl_run.py",
"silent": false,
"debug": true
},
"Qwen3-VL-8B-Instruct-IQ4_XS": {
"model_path": "D:\\ComfyUI\\models\\LLM\\Qwen3-VL-8B-Instruct-IQ4_XS.gguf",
"mmproj_path": "D:\\ComfyUI\\models\\LLM\\Qwen3-VL-8B-Instruct-mmproj-BF16.gguf",
"output_max_tokens": 2048,
"image_min_tokens": 1024,
"image_max_tokens": 2048,
"ctx": 8192,
"n_batch": 2048,
"n_ubatch": 512,
"gpu_layers": -1,
"temperature": 0.7,
"top_p": 0.92,
"min_p": 0.01,
"top_k": 40,
"repeat_penalty": 1.1,
"pool_size": 4194304,
"chat_handler": "qwen3",
"script": "qwen3vl_run.py",
"silent": false,
"debug": true
}
}
}
- Downloads last month
- 6,586
Hardware compatibility
Log In to add your hardware
4-bit
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support