This is a conversion of InternVL3.5 merge based on GPT-OSS-20b model to the GGUF format. It required a few adjustments to the conversion script:

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index eb43520f9..50c4c167a 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -972,11 +972,32 @@ class TextModel(ModelBase):
         from transformers import AutoTokenizer
         tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
         vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
-        assert max(tokenizer.vocab.values()) < vocab_size
+        if tokenizer.vocab:
+            max_vocab_id = max(tokenizer.vocab.values())
+            if max_vocab_id >= vocab_size:
+                added_vocab = tokenizer.get_added_vocab()
+                added_oob = [tok for tok, tok_id in added_vocab.items() if tok_id >= vocab_size]
+                if added_oob and "vocab_size" in self.hparams:
+                    logger.warning(
+                        "Tokenizer has added tokens with ids >= model vocab_size; "
+                        "these will be ignored. vocab_size=%d, max_token_id=%d, example_added_oob=%s",
+                        vocab_size,
+                        max_vocab_id,
+                        added_oob[:3],
+                    )
+                else:
+                    if "vocab_size" in self.hparams:
+                        logger.warning(
+                            "Tokenizer vocab max id (%d) >= hparams vocab_size (%d); "
+                            "expanding vocab to fit tokenizer base vocab.",
+                            max_vocab_id,
+                            vocab_size,
+                        )
+                    vocab_size = max_vocab_id + 1
 
         tokpre = self.get_vocab_base_pre(tokenizer)
 
-        reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
+        reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items() if id_ < vocab_size}
         added_vocab = tokenizer.get_added_vocab()
 
         added_tokens_decoder = tokenizer.added_tokens_decoder
@@ -10063,6 +10084,16 @@ class GptOssModel(TextModel):
         return []
 
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+        if name.startswith("language_model.model."):
+            name = "model." + name.removeprefix("language_model.model.")
+        elif name.startswith("language_model."):
+            name = name.removeprefix("language_model.")
+
+        if name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
+            or name.startswith("multimodal_projector.") or name.startswith("vision_model.") \
+            or name.startswith("mlp1."):
+            return
+
         if "sinks" in name:
             name += ".weight"
 
Downloads last month
693
GGUF
Model size
21B params
Architecture
gpt-oss
Hardware compatibility
Log In to add your hardware

4-bit

16-bit

Inference Providers NEW
This model isn't deployed by any Inference Provider. πŸ™‹ Ask for provider support

Model tree for aviallon/InternVL3_5-GPT-OSS-20B-A4B-Preview-GGUF

Quantized
(2)
this model