NotImplementedError Error in gqtqmodel
I have use model with 'transformers.AutoModelForImageTextToText',
The main packages version are listed below:
GPT-QModel : 5.8.0
Transformers : 5.2.0
Torch : 2.8.0+cu128
Triton : 2.0.0
gptqmodel rasie an NotImplementedError:
"
File "/mnt/data/linxin_4090_7/code/20260318_qwen_test/test/test_qwen3_5_27B_Int4_message.py", line 23, in
model = AutoModelForImageTextToText.from_pretrained(
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 374, in from_pretrained
return model_class.from_pretrained(
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4033, in from_pretrained
hf_quantizer.preprocess_model(
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/transformers/quantizers/base.py", line 167, in preprocess_model
self._process_model_before_weight_loading(model, **kwargs)
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/gptqmodel/init.py", line 48, in _process_model_before_weight_loading_with_device_map
return original_process(self, model, **kwargs)
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/transformers/quantizers/quantizer_gptq.py", line 87, in _process_model_before_weight_loading
model = self.optimum_quantizer.convert_model(model, **kwargs)
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/optimum/gptq/quantizer.py", line 277, in convert_model
self._replace_by_quant_layers(model, layers_to_be_replaced)
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/optimum/gptq/quantizer.py", line 357, in _replace_by_quant_layers
self._replace_by_quant_layers(child, names, name + "." + name1 if name != "" else name1)
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/optimum/gptq/quantizer.py", line 357, in _replace_by_quant_layers
self._replace_by_quant_layers(child, names, name + "." + name1 if name != "" else name1)
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/optimum/gptq/quantizer.py", line 357, in _replace_by_quant_layers
self._replace_by_quant_layers(child, names, name + "." + name1 if name != "" else name1)
[Previous line repeated 2 more times]
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/optimum/gptq/quantizer.py", line 324, in _replace_by_quant_layers
new_layer = self.quant_linear(
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/gptqmodel/nn_modules/qlinear/tritonv2.py", line 72, in init
super().init(
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/gptqmodel/nn_modules/qlinear/torch.py", line 140, in init
super().init(
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/gptqmodel/nn_modules/qlinear/init.py", line 449, in init
super().init(*args, **kwargs)
File "/mnt/data/miniconda3/envs/qwen35_gptq_py310/lib/python3.10/site-packages/gptqmodel/nn_modules/qlinear/init.py", line 116, in init
raise err
NotImplementedError: <class 'gptqmodel.nn_modules.qlinear.tritonv2.TritonV2QuantLinear'>: out_features: 48 must be divisible by [32].
terminate called without an active exception
Aborted
"