This is a conversion of InternVL3.5 merge based on GPT-OSS-20b model to the GGUF format. It required a few adjustments to the conversion script:
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index eb43520f9..50c4c167a 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -972,11 +972,32 @@ class TextModel(ModelBase):
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
- assert max(tokenizer.vocab.values()) < vocab_size
+ if tokenizer.vocab:
+ max_vocab_id = max(tokenizer.vocab.values())
+ if max_vocab_id >= vocab_size:
+ added_vocab = tokenizer.get_added_vocab()
+ added_oob = [tok for tok, tok_id in added_vocab.items() if tok_id >= vocab_size]
+ if added_oob and "vocab_size" in self.hparams:
+ logger.warning(
+ "Tokenizer has added tokens with ids >= model vocab_size; "
+ "these will be ignored. vocab_size=%d, max_token_id=%d, example_added_oob=%s",
+ vocab_size,
+ max_vocab_id,
+ added_oob[:3],
+ )
+ else:
+ if "vocab_size" in self.hparams:
+ logger.warning(
+ "Tokenizer vocab max id (%d) >= hparams vocab_size (%d); "
+ "expanding vocab to fit tokenizer base vocab.",
+ max_vocab_id,
+ vocab_size,
+ )
+ vocab_size = max_vocab_id + 1
tokpre = self.get_vocab_base_pre(tokenizer)
- reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
+ reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items() if id_ < vocab_size}
added_vocab = tokenizer.get_added_vocab()
added_tokens_decoder = tokenizer.added_tokens_decoder
@@ -10063,6 +10084,16 @@ class GptOssModel(TextModel):
return []
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ if name.startswith("language_model.model."):
+ name = "model." + name.removeprefix("language_model.model.")
+ elif name.startswith("language_model."):
+ name = name.removeprefix("language_model.")
+
+ if name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
+ or name.startswith("multimodal_projector.") or name.startswith("vision_model.") \
+ or name.startswith("mlp1."):
+ return
+
if "sinks" in name:
name += ".weight"
- Downloads last month
- 693
Hardware compatibility
Log In to add your hardware