Add configuration_flash_head_qwen3_vl.py for FlashHead support
Browse files
configuration_flash_head_qwen3_vl.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
try:
|
| 2 |
+
from vllm.model_executor.models.qwen3_vl import _SUPPORTS_EMBEDL_FLASHHEAD
|
| 3 |
+
except ImportError:
|
| 4 |
+
raise ImportError(
|
| 5 |
+
"\n\n"
|
| 6 |
+
"===============================================================\n"
|
| 7 |
+
" FlashHead requires the Embedl Docker container to run.\n"
|
| 8 |
+
"\n"
|
| 9 |
+
" Currently, FlashHead inference is supported only via vLLM.\n"
|
| 10 |
+
" Hugging Face Transformers support will be added in a future release.\n"
|
| 11 |
+
" Please use one of the following images:\n"
|
| 12 |
+
"\n"
|
| 13 |
+
" Jetson Orin: embedl/vllm:latest-jetson-orin-flashhead\n"
|
| 14 |
+
" Jetson Thor: embedl/vllm:latest-jetson-thor-flashhead\n"
|
| 15 |
+
"\n"
|
| 16 |
+
" Do NOT install embedl-models via pip directly.\n"
|
| 17 |
+
"===============================================================\n"
|
| 18 |
+
)
|