KeyError: None for when the model.generate() method is executed in mPlugOwl3

#8
by dutta18 - opened

Below is the whole stack trace, I am using transformers 4.57.3.

KeyError Traceback (most recent call last)
Cell In[15], line 1
----> 1 get_ipython().run_cell_magic('time', '', 'g = model.generate(**inputs)\nprint(g)\n')

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/IPython/core/interactiveshell.py:2565, in InteractiveShell.run_cell_magic(self, magic_name, line, cell)
2563 with self.builtin_trap:
2564 args = (magic_arg_s, cell)
-> 2565 result = fn(*args, **kwargs)
2567 # The code below prevents the output from being displayed
2568 # when using magics with decorator @output_can_be_silenced
2569 # when the last Python token in the expression is a ';'.
2570 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False):

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/IPython/core/magics/execution.py:1452, in ExecutionMagics.time(self, line, cell, local_ns)
1450 if interrupt_occured:
1451 if exit_on_interrupt and captured_exception:
-> 1452 raise captured_exception
1453 return
1454 return out

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/IPython/core/magics/execution.py:1416, in ExecutionMagics.time(self, line, cell, local_ns)
1414 st = clock2()
1415 try:
-> 1416 exec(code, glob, local_ns)
1417 out = None
1418 # multi-line %%time case

File :1

File ~/.cache/huggingface/modules/transformers_modules/mPLUG/mPLUG_hyphen_Owl3_hyphen_7B_hyphen_240728/eff25bcdc02ff1b513c25f376d761ec1ab6dfa1b/modeling_mplugowl3.py:152, in mPLUGOwl3Model.generate(self, input_ids, pixel_values, media_offset, attention_mask, tokenizer, stream, decode_text, **kwargs)
149 assert input_ids is not None
151 with torch.inference_mode():
--> 152 image_embeds = self.forward_image(pixel_values)
154 if stream:
155 result = self._decode_stream(input_ids=input_ids, image_embeds=image_embeds, media_offset=media_offset, tokenizer=tokenizer, **kwargs)

File ~/.cache/huggingface/modules/transformers_modules/mPLUG/mPLUG_hyphen_Owl3_hyphen_7B_hyphen_240728/eff25bcdc02ff1b513c25f376d761ec1ab6dfa1b/modeling_mplugowl3.py:70, in mPLUGOwl3Model.forward_image(self, pixel_values)
68 dtype = self.language_model.model.embed_tokens.weight.dtype
69 with torch.inference_mode():
---> 70 image_embeds = self.vision_model(pixel_values.to(dtype), output_hidden_states=True).hidden_states[-2]
72 if self.vision2text_model is not None:
73 image_embeds = self.vision2text_model(image_embeds)

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/torch/nn/modules/module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs)
1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1774 else:
-> 1775 return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/torch/nn/modules/module.py:1786, in Module._call_impl(self, *args, **kwargs)
1781 # If we don't have any hooks, we want to skip the rest of the logic in
1782 # this function, and just call forward.
1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1784 or _global_backward_pre_hooks or _global_backward_hooks
1785 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1786 return forward_call(*args, **kwargs)
1788 result = None
1789 called_always_called_hooks = set()

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py:700, in SiglipVisionTransformer.forward(self, pixel_values, interpolate_pos_encoding, **kwargs)
691 @auto_docstring
692 def forward(
693 self,
(...) 696 **kwargs: Unpack[TransformersKwargs],
697 ) -> BaseModelOutputWithPooling:
698 hidden_states = self.embeddings(pixel_values, interpolate_pos_encoding=interpolate_pos_encoding)
--> 700 encoder_outputs: BaseModelOutput = self.encoder(
701 inputs_embeds=hidden_states,
702 **kwargs,
703 )
705 last_hidden_state = encoder_outputs.last_hidden_state
706 last_hidden_state = self.post_layernorm(last_hidden_state)

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/torch/nn/modules/module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs)
1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1774 else:
-> 1775 return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/torch/nn/modules/module.py:1786, in Module._call_impl(self, *args, **kwargs)
1781 # If we don't have any hooks, we want to skip the rest of the logic in
1782 # this function, and just call forward.
1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1784 or _global_backward_pre_hooks or _global_backward_hooks
1785 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1786 return forward_call(*args, **kwargs)
1788 result = None
1789 called_always_called_hooks = set()

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py:559, in SiglipEncoder.forward(self, inputs_embeds, attention_mask, **kwargs)
557 hidden_states = inputs_embeds
558 for encoder_layer in self.layers:
--> 559 hidden_states = encoder_layer(
560 hidden_states,
561 attention_mask,
562 **kwargs,
563 )
565 return BaseModelOutput(last_hidden_state=hidden_states)

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/transformers/modeling_layers.py:94, in GradientCheckpointingLayer.call(self, *args, **kwargs)
91 logger.warning_once(message)
93 return self._gradient_checkpointing_func(partial(super().call, **kwargs), *args)
---> 94 return super().call(*args, **kwargs)

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/torch/nn/modules/module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs)
1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1774 else:
-> 1775 return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/torch/nn/modules/module.py:1786, in Module._call_impl(self, *args, **kwargs)
1781 # If we don't have any hooks, we want to skip the rest of the logic in
1782 # this function, and just call forward.
1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1784 or _global_backward_pre_hooks or _global_backward_hooks
1785 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1786 return forward_call(*args, **kwargs)
1788 result = None
1789 called_always_called_hooks = set()

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py:449, in SiglipEncoderLayer.forward(self, hidden_states, attention_mask, **kwargs)
446 residual = hidden_states
448 hidden_states = self.layer_norm1(hidden_states)
--> 449 hidden_states, _ = self.self_attn(
450 hidden_states=hidden_states,
451 attention_mask=attention_mask,
452 **kwargs,
453 )
454 hidden_states = residual + hidden_states
456 residual = hidden_states

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/torch/nn/modules/module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs)
1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1774 else:
-> 1775 return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/torch/nn/modules/module.py:1786, in Module._call_impl(self, *args, **kwargs)
1781 # If we don't have any hooks, we want to skip the rest of the logic in
1782 # this function, and just call forward.
1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1784 or _global_backward_pre_hooks or _global_backward_hooks
1785 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1786 return forward_call(*args, **kwargs)
1788 result = None
1789 called_always_called_hooks = set()

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py:395, in SiglipAttention.forward(self, hidden_states, attention_mask, **kwargs)
393 attention_interface: Callable = eager_attention_forward
394 if self.config._attn_implementation != "eager":
--> 395 attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
397 attn_output, attn_weights = attention_interface(
398 self,
399 queries,
(...) 405 dropout=0.0 if not self.training else self.dropout,
406 )
408 attn_output = attn_output.reshape(batch_size, seq_length, embed_dim).contiguous()

File ~/miniconda3/envs/vid_env/lib/python3.12/site-packages/transformers/utils/generic.py:1138, in GeneralInterface.getitem(self, key)
1136 if key in self._local_mapping:
1137 return self._local_mapping[key]
-> 1138 return self._global_mapping[key]

KeyError: None

Sign up or log in to comment