YAML Metadata Warning:empty or missing yaml metadata in repo card

Check out the documentation for more information.

https://github.com/KLL535/ComfyUI_Simple_Qwen3-VL-gguf

git clone https://github.com/JamePeng/llama-cpp-python
cd llama-cpp-python
git clone https://github.com/ggml-org/llama.cpp ./vendor/llama.cpp
$env:CMAKE_ARGS = "-DGGML_CUDA=on"
D:\ComfyUI\venv\Scripts\python -m pip install -e . --verbose

After compilation is finished, the llama-cpp-python folder must not be deleted.

There is no need to compile it manually anymore. You can use JamePeng’s precompiled WHL package, as long as the CUDA version matches exactly.

https://github.com/1038lab/ComfyUI-QwenVL

https://github.com/JamePeng/llama-cpp-python

https://developer.nvidia.com/cuda-toolkit-archive

Gemma 4 requires llama‑cpp‑python ≥ 0.3.35

ComfyUI\custom_nodes\ComfyUI_Simple_Qwen3-VL-gguf\system_prompts_user.json

{
    "_system_prompts": {
    },
    "_user_prompt_styles": {
    },
    "_camera_preset": {
    },
    "_model_presets": {
        "gemma-4-E4B-it-IQ4_XS": {
            "model_path": "D:\\ComfyUI\\models\\LLM\\gemma-4-E4B-it-IQ4_XS.gguf",
            "mmproj_path": "D:\\ComfyUI\\models\\LLM\\gemma-4-E4B-it-mmproj-BF16.gguf",
            "output_max_tokens": 2048,
            "ctx": 8192,
            "n_batch": 2048,
            "n_ubatch": 2048,
            "gpu_layers": -1,
            "temperature": 1.0,
            "top_p": 0.95,
            "min_p": 0.01,
            "top_k": 64,
            "repeat_penalty": 1.0,
            "chat_handler": "gemma4",
            "script": "qwen3vl_run.py",
            "silent": false,
            "debug": true,
            "verbose": true,
            "raw_mode": true,
            "prompt_template": "<|turn>system\n{system}<turn|>\n<|turn>user\n{images}\n{user}<turn|>\n<|turn>model\n",
            "stop": ["<turn|>", "<eos>", "<|end_of_turn|>"]
        },
        "Huihui-Qwen3.5-9B-abliterated.i1-IQ4_XS": {
            "model_path": "D:\\ComfyUI\\models\\LLM\\Huihui-Qwen3.5-9B-abliterated.i1-IQ4_XS.gguf",
            "mmproj_path": "D:\\ComfyUI\\models\\LLM\\Qwen3.5-9B-mmproj-BF16.gguf",
            "output_max_tokens": 2048,
            "image_min_tokens": 1024,
            "image_max_tokens": 2048,
            "ctx": 8192,
            "n_batch": 2048,
            "n_ubatch": 512,
            "gpu_layers": -1,
            "temperature": 0.7,
            "top_p": 0.8,
            "min_p": 0.05,
            "top_k": 20,
            "repeat_penalty": 1.0,
            "present_penalty": 1.5,
            "pool_size": 4194304,
            "chat_handler": "qwen35",
            "enable_thinking": false,
            "script": "qwen3vl_run.py",
            "silent": false,
            "debug": true
        },
        "Qwen3-VL-8B-Instruct-IQ4_XS": {
            "model_path": "D:\\ComfyUI\\models\\LLM\\Qwen3-VL-8B-Instruct-IQ4_XS.gguf",
            "mmproj_path": "D:\\ComfyUI\\models\\LLM\\Qwen3-VL-8B-Instruct-mmproj-BF16.gguf",
            "output_max_tokens": 2048,
            "image_min_tokens": 1024,
            "image_max_tokens": 2048,
            "ctx": 8192,
            "n_batch": 2048,
            "n_ubatch": 512,
            "gpu_layers": -1,
            "temperature": 0.7,
            "top_p": 0.92,
            "min_p": 0.01,
            "top_k": 40,
            "repeat_penalty": 1.1,
            "pool_size": 4194304,
            "chat_handler": "qwen3",
            "script": "qwen3vl_run.py",
            "silent": false,
            "debug": true
        }
    }
}

Downloads last month: 6,586

GGUF

Model size

8B params

Architecture

llama

Hardware compatibility

4-bit

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support