YAML Metadata Warning:empty or missing yaml metadata in repo card

Check out the documentation for more information.

https://github.com/KLL535/ComfyUI_Simple_Qwen3-VL-gguf

git clone https://github.com/JamePeng/llama-cpp-python
cd llama-cpp-python
git clone https://github.com/ggml-org/llama.cpp ./vendor/llama.cpp
$env:CMAKE_ARGS = "-DGGML_CUDA=on"
D:\ComfyUI\venv\Scripts\python -m pip install -e . --verbose

After compilation is finished, the llama-cpp-python folder must not be deleted.


There is no need to compile it manually anymore. You can use JamePeng’s precompiled WHL package, as long as the CUDA version matches exactly.

https://github.com/1038lab/ComfyUI-QwenVL

https://github.com/JamePeng/llama-cpp-python

https://developer.nvidia.com/cuda-toolkit-archive


Gemma 4 requires llama‑cpp‑python ≥ 0.3.35

ComfyUI\custom_nodes\ComfyUI_Simple_Qwen3-VL-gguf\system_prompts_user.json

{
    "_system_prompts": {
    },
    "_user_prompt_styles": {
    },
    "_camera_preset": {
    },
    "_model_presets": {
        "gemma-4-E4B-it-IQ4_XS": {
            "model_path": "D:\\ComfyUI\\models\\LLM\\gemma-4-E4B-it-IQ4_XS.gguf",
            "mmproj_path": "D:\\ComfyUI\\models\\LLM\\gemma-4-E4B-it-mmproj-BF16.gguf",
            "output_max_tokens": 2048,
            "ctx": 8192,
            "n_batch": 2048,
            "n_ubatch": 2048,
            "gpu_layers": -1,
            "temperature": 1.0,
            "top_p": 0.95,
            "min_p": 0.01,
            "top_k": 64,
            "repeat_penalty": 1.0,
            "chat_handler": "gemma4",
            "script": "qwen3vl_run.py",
            "silent": false,
            "debug": true,
            "verbose": true,
            "raw_mode": true,
            "prompt_template": "<|turn>system\n{system}<turn|>\n<|turn>user\n{images}\n{user}<turn|>\n<|turn>model\n",
            "stop": ["<turn|>", "<eos>", "<|end_of_turn|>"]
        },
        "Huihui-Qwen3.5-9B-abliterated.i1-IQ4_XS": {
            "model_path": "D:\\ComfyUI\\models\\LLM\\Huihui-Qwen3.5-9B-abliterated.i1-IQ4_XS.gguf",
            "mmproj_path": "D:\\ComfyUI\\models\\LLM\\Qwen3.5-9B-mmproj-BF16.gguf",
            "output_max_tokens": 2048,
            "image_min_tokens": 1024,
            "image_max_tokens": 2048,
            "ctx": 8192,
            "n_batch": 2048,
            "n_ubatch": 512,
            "gpu_layers": -1,
            "temperature": 0.7,
            "top_p": 0.8,
            "min_p": 0.05,
            "top_k": 20,
            "repeat_penalty": 1.0,
            "present_penalty": 1.5,
            "pool_size": 4194304,
            "chat_handler": "qwen35",
            "enable_thinking": false,
            "script": "qwen3vl_run.py",
            "silent": false,
            "debug": true
        },
        "Qwen3-VL-8B-Instruct-IQ4_XS": {
            "model_path": "D:\\ComfyUI\\models\\LLM\\Qwen3-VL-8B-Instruct-IQ4_XS.gguf",
            "mmproj_path": "D:\\ComfyUI\\models\\LLM\\Qwen3-VL-8B-Instruct-mmproj-BF16.gguf",
            "output_max_tokens": 2048,
            "image_min_tokens": 1024,
            "image_max_tokens": 2048,
            "ctx": 8192,
            "n_batch": 2048,
            "n_ubatch": 512,
            "gpu_layers": -1,
            "temperature": 0.7,
            "top_p": 0.92,
            "min_p": 0.01,
            "top_k": 40,
            "repeat_penalty": 1.1,
            "pool_size": 4194304,
            "chat_handler": "qwen3",
            "script": "qwen3vl_run.py",
            "silent": false,
            "debug": true
        }
    }
}
Downloads last month
6,586
GGUF
Model size
8B params
Architecture
llama
Hardware compatibility
Log In to add your hardware

4-bit

Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support