Sailesh97
/

Indivec

Model card Files Files and versions

Sailesh Panda commited on Nov 8, 2025

Commit

7c50ab0

·

1 Parent(s): 85af162

Experiments

Files changed (3) hide show

config.json +2 -0
configuration_hinvec.py +1 -1
modeling_hinvec.py +2 -3

config.json CHANGED Viewed

@@ -33,6 +33,8 @@
   "use_cache": true,
   "use_sliding_window": true,
   "vocab_size": 256002,
   "auto_map": {
     "AutoConfig": "Sailesh97/Hinvec2--configuration_hinvec.HinvecConfig",
     "AutoModel": "Sailesh97/Hinvec2--modeling_hinvec.HinvecModel"}

   "use_cache": true,
   "use_sliding_window": true,
   "vocab_size": 256002,
+  "pad_token_id": 3,
+  "bos_token_id": 256000,
   "auto_map": {
     "AutoConfig": "Sailesh97/Hinvec2--configuration_hinvec.HinvecConfig",
     "AutoModel": "Sailesh97/Hinvec2--modeling_hinvec.HinvecModel"}

configuration_hinvec.py CHANGED Viewed

@@ -151,7 +151,7 @@ class HinvecConfig(PretrainedConfig):
         rope_theta=10000.0,
         rope_scaling=None,
         use_sliding_window=False,
-        sliding_window=4096,
         attention_bias=False,
         max_window_layers=28,
         attention_dropout=0.0,

         rope_theta=10000.0,
         rope_scaling=None,
         use_sliding_window=False,
+        sliding_window=512,
         attention_bias=False,
         max_window_layers=28,
         attention_dropout=0.0,

modeling_hinvec.py CHANGED Viewed

@@ -997,7 +997,6 @@ class HinvecModel(HinvecPreTrainedModel):
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
         inputs_embeds: Optional[torch.FloatTensor] = None,
-        cls_token_id: Optional[int] = 256000,  # Add CLS token ID as parameter
         **kwargs: Unpack[TransformersKwargs],
     ) -> BaseModelOutputWithPast:
         if (input_ids is None) ^ (inputs_embeds is not None):
@@ -1021,9 +1020,9 @@ class HinvecModel(HinvecPreTrainedModel):
         global_attention_mask = torch.zeros((batch_size, seq_len), device=device, dtype=torch.long)
         # If input_ids is available, find CLS tokens
-        if input_ids is not None and cls_token_id is not None:
             # Mark all CLS token positions as global attention
-            global_attention_mask = (input_ids == cls_token_id).long()
         else:
             # Default: assume first token is CLS (common convention)
             global_attention_mask[:, 0] = 1

         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
         inputs_embeds: Optional[torch.FloatTensor] = None,
         **kwargs: Unpack[TransformersKwargs],
     ) -> BaseModelOutputWithPast:
         if (input_ids is None) ^ (inputs_embeds is not None):
         global_attention_mask = torch.zeros((batch_size, seq_len), device=device, dtype=torch.long)
         # If input_ids is available, find CLS tokens
+        if input_ids is not None and self.config.bos_token_id is not None:
             # Mark all CLS token positions as global attention
+            global_attention_mask = (input_ids == self.config.bos_token_id).long()
         else:
             # Default: assume first token is CLS (common convention)
             global_attention_mask[:, 0] = 1