AttributeError: 'Qwen2_5_VLVisionAttention' object has no attribute 'is_causal'
AttributeError: 'Qwen2_5_VLVisionAttention' object has no attribute 'is_causal'
AttributeError: 'Qwen2_5_VLVisionAttention' object has no attribute 'is_causal'
can you share your code that raise this error and the version of your transformers lib?
# Load processor and model
processor = AutoProcessor.from_pretrained(model_name)
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
attn_implementation="flash_attention_2",
device_map="auto",
)
device = next(model.parameters()).device
# Load input data
with open(input_json_path, 'r') as f:
data = json.load(f)
results = []
for entry in data:
img_path = entry.get("img_path")
prompt_text = entry.get("prompt")
# Prepare inference messages
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": img_path},
{"type": "text", "text": prompt_text}
],
}
]
# Apply chat template
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
# Process vision inputs
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
inputs = inputs.to(device)
# Run generation
generated_ids = model.generate(**inputs, max_new_tokens=128)
generated_sequences = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
output_texts = processor.batch_decode(
generated_sequences,
skip_special_tokens=True,
clean_up_tokenization_spaces=False,
)
# Attach response
response = output_texts[0].strip()
result_entry = entry.copy()
result_entry["model_response"] = response
results.append(result_entry)
python -c "import transformers; print(transformers.version)"
4.53.0
# Load processor and model processor = AutoProcessor.from_pretrained(model_name) model = Qwen2_5_VLForConditionalGeneration.from_pretrained( model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", device_map="auto", ) device = next(model.parameters()).device # Load input data with open(input_json_path, 'r') as f: data = json.load(f) results = [] for entry in data: img_path = entry.get("img_path") prompt_text = entry.get("prompt") # Prepare inference messages messages = [ { "role": "user", "content": [ {"type": "image", "image": img_path}, {"type": "text", "text": prompt_text} ], } ] # Apply chat template text = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Process vision inputs image_inputs, video_inputs = process_vision_info(messages) inputs = processor( text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt", ) inputs = inputs.to(device) # Run generation generated_ids = model.generate(**inputs, max_new_tokens=128) generated_sequences = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] output_texts = processor.batch_decode( generated_sequences, skip_special_tokens=True, clean_up_tokenization_spaces=False, ) # Attach response response = output_texts[0].strip() result_entry = entry.copy() result_entry["model_response"] = response results.append(result_entry)python -c "import transformers; print(transformers.version)"
4.53.0
We tried this transformers version and reported the same error. So the reason is that the latest transformers is not compatible with our model. You may need to use transformers<=4.52.1
Thanks
# Load processor and model processor = AutoProcessor.from_pretrained(model_name) model = Qwen2_5_VLForConditionalGeneration.from_pretrained( model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", device_map="auto", ) device = next(model.parameters()).device # Load input data with open(input_json_path, 'r') as f: data = json.load(f) results = [] for entry in data: img_path = entry.get("img_path") prompt_text = entry.get("prompt") # Prepare inference messages messages = [ { "role": "user", "content": [ {"type": "image", "image": img_path}, {"type": "text", "text": prompt_text} ], } ] # Apply chat template text = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Process vision inputs image_inputs, video_inputs = process_vision_info(messages) inputs = processor( text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt", ) inputs = inputs.to(device) # Run generation generated_ids = model.generate(**inputs, max_new_tokens=128) generated_sequences = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] output_texts = processor.batch_decode( generated_sequences, skip_special_tokens=True, clean_up_tokenization_spaces=False, ) # Attach response response = output_texts[0].strip() result_entry = entry.copy() result_entry["model_response"] = response results.append(result_entry)python -c "import transformers; print(transformers.version)"
4.53.0
https://github.com/huggingface/transformers/issues/39095
It should be a transformers bug, which also faced in original qwen2_5_vl models.