Taykhoom
/

Helix-mRNA-Wrapper

custom_code

Model card Files Files and versions

xet

Community

Taykhoom commited on 22 days ago

Commit

adac0f5

verified ·

1 Parent(s): a8c6b7e

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

README.md +1 -0
modeling_helix_mrna.py +24 -4

README.md CHANGED Viewed

@@ -108,6 +108,7 @@ This repository contains modified versions of Helical code.
 Modifications include:
 - Removal of reliance on helical package
 - Removal of some ease-of-use embedding generation code (to standardize usage) and other checks (see original repository for more details)
 Not all of the original functionality may be preserved. These changes were made to better integrate with the mRNABench framework which focuses on embedding generation for mRNA sequences. Most of the required code was directly copied from the original Helical repository with minimal changes, so please refer to the original repository for full details on the implementation.

 Modifications include:
 - Removal of reliance on helical package
 - Removal of some ease-of-use embedding generation code (to standardize usage) and other checks (see original repository for more details)
+- Standardized return of attention maps and output embeddings to be in line with Hugging Face convention (i.e. None returned for all Mamba blocks attention weights, and input_embeddings returned when output_hidden_states is True)
 Not all of the original functionality may be preserved. These changes were made to better integrate with the mRNABench framework which focuses on embedding generation for mRNA sequences. Most of the required code was directly copied from the original Helical repository with minimal changes, so please refer to the original repository for full details on the implementation.

modeling_helix_mrna.py CHANGED Viewed

@@ -1431,6 +1431,8 @@ class HelixmRNAOutput(ModelOutput):
             avoid providing the old `input_ids`.
             Includes both the State space model state matrices after the selective scan, and the Convolutional states
         hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
             Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
             one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
@@ -1440,6 +1442,7 @@ class HelixmRNAOutput(ModelOutput):
     last_hidden_state: Optional[torch.FloatTensor] = None
     cache_params: Optional[Mamba2Cache] = None
     hidden_states: Optional[Tuple[torch.FloatTensor]] = None
@@ -1579,6 +1582,13 @@ class HelixmRNAModel(HelixmRNAPreTrainedModel):
         all_hidden_states = () if output_hidden_states else None
         all_self_attns = () if output_attentions else None
         for helix_block in self.layers:
             layer_mask = (
@@ -1610,15 +1620,22 @@ class HelixmRNAModel(HelixmRNAPreTrainedModel):
             if output_hidden_states:
                 all_hidden_states += (layer_outputs[0],)
         hidden_states = self.norm_f(layer_outputs[0])
         if output_hidden_states:
             all_hidden_states = all_hidden_states + (hidden_states,)
-            if output_attentions:
-                if layer_outputs[1] is not None:
-                    # append attentions only of attention layers. Mamba layers return `None` as the attention weights
-                    all_self_attns += (layer_outputs[1],)
         if use_cache:
             cache_params.seqlen_offset += inputs_embeds.shape[1]
@@ -1630,11 +1647,14 @@ class HelixmRNAModel(HelixmRNAPreTrainedModel):
                 if v is not None
             )
         return HelixmRNAOutput(
             last_hidden_state=hidden_states,
             cache_params=cache_params if use_cache else None,
             hidden_states=all_hidden_states,
         )
     def _update_causal_mask(self, attention_mask, input_tensor, cache_position):
         if self.config._attn_implementation == "flash_attention_2":

             avoid providing the old `input_ids`.
             Includes both the State space model state matrices after the selective scan, and the Convolutional states
+        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
+            Attention weights of all attention layers. Each entry is a tensor of shape `(batch_size, num_heads, sequence_length, sequence_length)`.
         hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
             Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
             one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
     last_hidden_state: Optional[torch.FloatTensor] = None
     cache_params: Optional[Mamba2Cache] = None
+    attentions: Optional[Tuple[torch.FloatTensor]] = None
     hidden_states: Optional[Tuple[torch.FloatTensor]] = None
         all_hidden_states = () if output_hidden_states else None
         all_self_attns = () if output_attentions else None
+        ####### CHANGE TO BE IN LINE WITH HF CONVENTION #######
+        if output_hidden_states:
+            all_hidden_states += (inputs_embeds,)  # index 0 = embedding, matching HF convention
+        ####### END OF CHANGE #######
         for helix_block in self.layers:
             layer_mask = (
             if output_hidden_states:
                 all_hidden_states += (layer_outputs[0],)
+            ####### CHANGE TO BE IN LINE WITH HF CONVENTION #######
+            if output_attentions:
+                all_self_attns += (layer_outputs[1] if len(layer_outputs) > 1 else None,)
+            ####### END OF CHANGE #######
         hidden_states = self.norm_f(layer_outputs[0])
         if output_hidden_states:
             all_hidden_states = all_hidden_states + (hidden_states,)
+            ####### CHANGE TO BE IN LINE WITH HF CONVENTION #######
+            # if output_attentions:
+            #     if layer_outputs[1] is not None:
+            #         # append attentions only of attention layers. Mamba layers return `None` as the attention weights
+            #         all_self_attns += (layer_outputs[1],)
+            ####### END OF CHANGE #######
         if use_cache:
             cache_params.seqlen_offset += inputs_embeds.shape[1]
                 if v is not None
             )
+        ####### CHANGE TO BE IN LINE WITH HF CONVENTION #######
         return HelixmRNAOutput(
             last_hidden_state=hidden_states,
             cache_params=cache_params if use_cache else None,
+            attentions=all_self_attns,
             hidden_states=all_hidden_states,
         )
+        ####### END OF CHANGE #######
     def _update_causal_mask(self, attention_mask, input_tensor, cache_position):
         if self.config._attn_implementation == "flash_attention_2":