Tom Aarsen commited on 22 days ago

Commit

1a5fa14

1 Parent(s): e21cde3

Integrate with upcoming Sentence Transformers v5.5.0

Files changed (28) hide show

.gitattributes +1 -0
README.md +60 -5
audio_0_WhisperEncoderTransformer/config.json +152 -0
audio_0_WhisperEncoderTransformer/model.safetensors +3 -0
audio_0_WhisperEncoderTransformer/processor_config.json +17 -0
audio_0_WhisperEncoderTransformer/sentence_bert_config.json +14 -0
audio_0_WhisperEncoderTransformer/tokenizer.json +3 -0
audio_0_WhisperEncoderTransformer/tokenizer_config.json +128 -0
audio_1_Pooling/config.json +5 -0
audio_2_Dense/config.json +8 -0
audio_2_Dense/model.safetensors +3 -0
config_sentence_transformers.json +11 -0
image_0_SiglipVisionTransformer/config.json +18 -0
image_0_SiglipVisionTransformer/model.safetensors +3 -0
image_0_SiglipVisionTransformer/preprocessor_config.json +23 -0
image_0_SiglipVisionTransformer/sentence_bert_config.json +10 -0
image_1_Pooling/config.json +5 -0
image_2_Dense/config.json +8 -0
image_2_Dense/model.safetensors +3 -0
modeling_multimodal_embed.py +51 -0
modules.json +8 -0
router_config.json +39 -0
text_0_Transformer/config.json +79 -0
text_0_Transformer/model.safetensors +3 -0
text_0_Transformer/sentence_bert_config.json +10 -0
text_0_Transformer/tokenizer.json +3 -0
text_0_Transformer/tokenizer_config.json +27 -0
text_1_Pooling/config.json +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-library_name: pytorch
 pipeline_tag: sentence-similarity
 tags:
 - sentence-transformers
@@ -84,15 +84,70 @@ Training characteristics:
 ## How To Use It
-## Installation
 ```bash
-pip install torch sentence-transformers transformers accelerate safetensors pillow librosa soundfile huggingface_hub
 ```
-## Python Usage
-The simplest way to use the model is to download the repository snapshot, load the packaged source code, and then encode one or more modality-tagged items.
 ```python
 import json

 ---
 license: apache-2.0
+library_name: sentence-transformers
 pipeline_tag: sentence-similarity
 tags:
 - sentence-transformers
 ## How To Use It
+### Using Sentence Transformers
+Install Sentence Transformers with the audio and image extras:
 ```bash
+pip install "sentence_transformers[image,audio]"
+```
+Then load the model directly. Modality is inferred automatically from the input (plain strings -> `text`, image paths/URLs/PIL images -> `image`, audio paths/URLs/NumPy arrays -> `audio`):
+```python
+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer("llm-semantic-router/multi-modal-embed-large", trust_remote_code=True)
+text_embeddings = model.encode(
+    [
+        "Martin Luther King Jr. delivering his I have a dream speech",
+        "two cats sleeping side by side on a pink couch",
+    ]
+)
+image_embeddings = model.encode(
+    [
+        "http://images.cocodataset.org/val2017/000000039769.jpg",  # two cats on a pink couch
+        "http://images.cocodataset.org/val2017/000000000139.jpg",  # distractor
+    ]
+)
+audio_embeddings = model.encode(
+    [
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",            # MLK speech
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/i-know-kung-fu.mp3",  # distractor
+    ]
+)
+print(text_embeddings.shape, image_embeddings.shape, audio_embeddings.shape)
+# (2, 768) (2, 768) (2, 768)
+# Each row is a text query, each column a media candidate; the highest score per row is the
+# correct cross-modal match.
+print(model.similarity(text_embeddings, image_embeddings))
+# tensor([[0.0704, 0.0121],   # MLK text:  neither image matches
+#         [0.5532, 0.3070]])  # cats text: the cats photo wins
+print(model.similarity(text_embeddings, audio_embeddings))
+# tensor([[ 0.2186,  0.1428],   # MLK text:  the MLK audio wins
+#         [-0.0625,  0.0667]])  # cats text: neither audio matches
 ```
+Each modality routes through the matching sub-module pipeline:
+- `text` -> `Transformer(mmbert) -> Pooling(mean) -> Normalize`
+- `image` -> `SiglipVisionTransformer -> Pooling(mean) -> Dense(1152, 768) -> Normalize`
+- `audio` -> `WhisperEncoderTransformer -> Pooling(mean) -> Dense(1024, 768) -> Normalize`
+### Using the packaged `hf_st_mm` source code
+The original packaged inference path remains available alongside the Sentence Transformers integration. Install the dependencies:
+```bash
+pip install torch sentence-transformers transformers accelerate safetensors pillow librosa soundfile huggingface_hub
+```
+Then download the repository snapshot, load the packaged source code, and encode modality-tagged items:
 ```python
 import json

audio_0_WhisperEncoderTransformer/config.json ADDED Viewed

	@@ -0,0 +1,152 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperEncoder"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 24,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "dtype": "float32",
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 24,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": [
+    [
+      1,
+      50259
+    ],
+    [
+      2,
+      50359
+    ],
+    [
+      3,
+      50363
+    ]
+  ],
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "suppress_tokens": [
+    1,
+    2,
+    7,
+    8,
+    9,
+    10,
+    14,
+    25,
+    26,
+    27,
+    28,
+    29,
+    31,
+    58,
+    59,
+    60,
+    61,
+    62,
+    63,
+    90,
+    91,
+    92,
+    93,
+    359,
+    503,
+    522,
+    542,
+    873,
+    893,
+    902,
+    918,
+    922,
+    931,
+    1350,
+    1853,
+    1982,
+    2460,
+    2627,
+    3246,
+    3253,
+    3268,
+    3536,
+    3846,
+    3961,
+    4183,
+    4667,
+    6585,
+    6647,
+    7273,
+    9061,
+    9383,
+    10428,
+    10929,
+    11938,
+    12033,
+    12331,
+    12562,
+    13793,
+    14157,
+    14635,
+    15265,
+    15618,
+    16553,
+    16604,
+    18362,
+    18956,
+    20075,
+    21675,
+    22520,
+    26130,
+    26161,
+    26435,
+    28279,
+    29464,
+    31650,
+    32302,
+    32470,
+    36865,
+    42863,
+    47425,
+    49870,
+    50254,
+    50258,
+    50358,
+    50359,
+    50360,
+    50361,
+    50362
+  ],
+  "tie_word_embeddings": true,
+  "transformers_version": "5.6.2",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

audio_0_WhisperEncoderTransformer/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e1159b2a18a2298f88a5a6951ade86ccbe6d6dae520fbcfe21c2336a58bcf92
+size 1228902976

audio_0_WhisperEncoderTransformer/processor_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "feature_extractor": {
+    "chunk_length": 30,
+    "dither": 0.0,
+    "feature_extractor_type": "WhisperFeatureExtractor",
+    "feature_size": 80,
+    "hop_length": 160,
+    "n_fft": 400,
+    "n_samples": 480000,
+    "nb_max_frames": 3000,
+    "padding_side": "right",
+    "padding_value": 0.0,
+    "return_attention_mask": false,
+    "sampling_rate": 16000
+  },
+  "processor_class": "WhisperProcessor"
+}

audio_0_WhisperEncoderTransformer/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "transformer_task": "feature-extraction",
+    "modality_config": {
+        "audio": {
+            "method": "forward",
+            "method_output_name": "last_hidden_state"
+        },
+        "audio+text": {
+            "method": "forward",
+            "method_output_name": "last_hidden_state"
+        }
+    },
+    "module_output_name": "token_embeddings"
+}

audio_0_WhisperEncoderTransformer/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c0be0f47d27cf2a0de40266711d8ca68ce58bafedab73fc457719fe437d2bfa
+size 4195347

audio_0_WhisperEncoderTransformer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,128 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<|endoftext|>",
+    "<|startoftranscript|>",
+    "<|en|>",
+    "<|zh|>",
+    "<|de|>",
+    "<|es|>",
+    "<|ru|>",
+    "<|ko|>",
+    "<|fr|>",
+    "<|ja|>",
+    "<|pt|>",
+    "<|tr|>",
+    "<|pl|>",
+    "<|ca|>",
+    "<|nl|>",
+    "<|ar|>",
+    "<|sv|>",
+    "<|it|>",
+    "<|id|>",
+    "<|hi|>",
+    "<|fi|>",
+    "<|vi|>",
+    "<|he|>",
+    "<|uk|>",
+    "<|el|>",
+    "<|ms|>",
+    "<|cs|>",
+    "<|ro|>",
+    "<|da|>",
+    "<|hu|>",
+    "<|ta|>",
+    "<|no|>",
+    "<|th|>",
+    "<|ur|>",
+    "<|hr|>",
+    "<|bg|>",
+    "<|lt|>",
+    "<|la|>",
+    "<|mi|>",
+    "<|ml|>",
+    "<|cy|>",
+    "<|sk|>",
+    "<|te|>",
+    "<|fa|>",
+    "<|lv|>",
+    "<|bn|>",
+    "<|sr|>",
+    "<|az|>",
+    "<|sl|>",
+    "<|kn|>",
+    "<|et|>",
+    "<|mk|>",
+    "<|br|>",
+    "<|eu|>",
+    "<|is|>",
+    "<|hy|>",
+    "<|ne|>",
+    "<|mn|>",
+    "<|bs|>",
+    "<|kk|>",
+    "<|sq|>",
+    "<|sw|>",
+    "<|gl|>",
+    "<|mr|>",
+    "<|pa|>",
+    "<|si|>",
+    "<|km|>",
+    "<|sn|>",
+    "<|yo|>",
+    "<|so|>",
+    "<|af|>",
+    "<|oc|>",
+    "<|ka|>",
+    "<|be|>",
+    "<|tg|>",
+    "<|sd|>",
+    "<|gu|>",
+    "<|am|>",
+    "<|yi|>",
+    "<|lo|>",
+    "<|uz|>",
+    "<|fo|>",
+    "<|ht|>",
+    "<|ps|>",
+    "<|tk|>",
+    "<|nn|>",
+    "<|mt|>",
+    "<|sa|>",
+    "<|lb|>",
+    "<|my|>",
+    "<|bo|>",
+    "<|tl|>",
+    "<|mg|>",
+    "<|as|>",
+    "<|tt|>",
+    "<|haw|>",
+    "<|ln|>",
+    "<|ha|>",
+    "<|ba|>",
+    "<|jw|>",
+    "<|su|>",
+    "<|translate|>",
+    "<|transcribe|>",
+    "<|startoflm|>",
+    "<|startofprev|>",
+    "<|nocaptions|>",
+    "<|notimestamps|>"
+  ],
+  "is_local": false,
+  "language": null,
+  "local_files_only": false,
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "predict_timestamps": false,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "task": null,
+  "tokenizer_class": "WhisperTokenizer",
+  "unk_token": "<|endoftext|>"
+}

audio_1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "embedding_dimension": 1024,
+    "pooling_mode": "mean",
+    "include_prompt": true
+}

audio_2_Dense/config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "in_features": 1024,
+    "out_features": 768,
+    "bias": true,
+    "activation_function": "torch.nn.modules.linear.Identity",
+    "module_input_name": "sentence_embedding",
+    "module_output_name": "sentence_embedding"
+}

audio_2_Dense/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:722c64f37968e8a783ec7765d752b0e51d8194865a9f705812bca038f87bd7dc
+size 3148960

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "__version__": {
+    "pytorch": "2.10.0+cu128",
+    "sentence_transformers": "5.5.0",
+    "transformers": "5.6.2"
+  },
+  "default_prompt_name": null,
+  "model_type": "SentenceTransformer",
+  "prompts": {},
+  "similarity_fn_name": "cosine"
+}

image_0_SiglipVisionTransformer/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "architectures": [
+    "SiglipVisionModel"
+  ],
+  "attention_dropout": 0.0,
+  "dtype": "float32",
+  "hidden_act": "gelu_pytorch_tanh",
+  "hidden_size": 1152,
+  "image_size": 384,
+  "intermediate_size": 4304,
+  "layer_norm_eps": 1e-06,
+  "model_type": "siglip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 27,
+  "patch_size": 14,
+  "transformers_version": "5.6.2"
+}

image_0_SiglipVisionTransformer/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c441e33eea6623e252f9ec9915cf9e75b2fdd8245309456884316676d0229ac
+size 1712951472

image_0_SiglipVisionTransformer/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 384,
+    "width": 384
+  }
+}

image_0_SiglipVisionTransformer/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "transformer_task": "feature-extraction",
+    "modality_config": {
+        "image": {
+            "method": "forward",
+            "method_output_name": "last_hidden_state"
+        }
+    },
+    "module_output_name": "token_embeddings"
+}

image_1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "embedding_dimension": 1152,
+    "pooling_mode": "mean",
+    "include_prompt": true
+}

image_2_Dense/config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "in_features": 1152,
+    "out_features": 768,
+    "bias": true,
+    "activation_function": "torch.nn.modules.linear.Identity",
+    "module_input_name": "sentence_embedding",
+    "module_output_name": "sentence_embedding"
+}

image_2_Dense/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16e7e4136c4d0a4f19bec7e7d22afd16ffe1755f7143046f2b80f89d00719654
+size 3542176

modeling_multimodal_embed.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from __future__ import annotations
+from typing import Any
+from torch import Tensor
+from transformers import PretrainedConfig, PreTrainedModel
+from sentence_transformers.base.modules.transformer import Transformer
+class SiglipVisionTransformer(Transformer):
+    """Drop-in :class:`Transformer` subclass that exposes only the SigLIP vision tower."""
+    def __init__(self, model_name_or_path: str, **kwargs: Any) -> None:
+        super().__init__(model_name_or_path, **kwargs)
+        # Drop the unused SigLIP text tokenizer (~17 MB) from the saved layout.
+        if hasattr(self.processor, "image_processor"):
+            self.processor = self.processor.image_processor
+    def _load_model(
+        self,
+        model_name_or_path: str,
+        transformer_task: str,
+        config: PretrainedConfig,
+        backend: str,
+        is_peft_model: bool,
+        **model_kwargs: Any,
+    ) -> PreTrainedModel:
+        full_model = super()._load_model(
+            model_name_or_path, transformer_task, config, backend, is_peft_model, **model_kwargs
+        )
+        # getattr keeps the vision tower on fresh init; on reload it's already SiglipVisionModel.
+        return getattr(full_model, "vision_model", full_model)
+    def forward(self, features: dict[str, Tensor], **kwargs: Any) -> dict[str, Tensor]:
+        features = super().forward(features, **kwargs)
+        # Drop the first patch token to match training-time pooling.
+        features["token_embeddings"] = features["token_embeddings"][:, 1:]
+        return features
+class WhisperEncoderTransformer(Transformer):
+    """Drop-in :class:`Transformer` subclass that decodes audio file paths/URLs into waveforms."""
+    def preprocess(
+        self, inputs: list[Any], prompt: str | None = None, **kwargs: Any
+    ) -> dict[str, Tensor]:
+        from transformers.audio_utils import load_audio
+        loaded = [load_audio(item) if isinstance(item, str) else item for item in inputs]
+        return super().preprocess(loaded, prompt=prompt, **kwargs)

modules.json ADDED Viewed

	@@ -0,0 +1,8 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.base.modules.router.Router"
+  }
+]

router_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+    "types": {
+        "text_0_Transformer": "sentence_transformers.base.modules.transformer.Transformer",
+        "text_1_Pooling": "sentence_transformers.sentence_transformer.modules.pooling.Pooling",
+        "text_2_Normalize": "sentence_transformers.sentence_transformer.modules.normalize.Normalize",
+        "image_0_SiglipVisionTransformer": "modeling_multimodal_embed.SiglipVisionTransformer",
+        "image_1_Pooling": "sentence_transformers.sentence_transformer.modules.pooling.Pooling",
+        "image_2_Dense": "sentence_transformers.base.modules.dense.Dense",
+        "image_3_Normalize": "sentence_transformers.sentence_transformer.modules.normalize.Normalize",
+        "audio_0_WhisperEncoderTransformer": "modeling_multimodal_embed.WhisperEncoderTransformer",
+        "audio_1_Pooling": "sentence_transformers.sentence_transformer.modules.pooling.Pooling",
+        "audio_2_Dense": "sentence_transformers.base.modules.dense.Dense",
+        "audio_3_Normalize": "sentence_transformers.sentence_transformer.modules.normalize.Normalize"
+    },
+    "structure": {
+        "text": [
+            "text_0_Transformer",
+            "text_1_Pooling",
+            "text_2_Normalize"
+        ],
+        "image": [
+            "image_0_SiglipVisionTransformer",
+            "image_1_Pooling",
+            "image_2_Dense",
+            "image_3_Normalize"
+        ],
+        "audio": [
+            "audio_0_WhisperEncoderTransformer",
+            "audio_1_Pooling",
+            "audio_2_Dense",
+            "audio_3_Normalize"
+        ]
+    },
+    "parameters": {
+        "default_route": "text",
+        "allow_empty_key": true,
+        "route_mappings": {}
+    }
+}

text_0_Transformer/config.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+  "architectures": [
+    "ModernBertModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "mean",
+  "cls_token_id": 1,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
+  "dtype": "bfloat16",
+  "embedding_dropout": 0.0,
+  "eos_token_id": 1,
+  "global_attn_every_n_layers": 3,
+  "gradient_checkpointing": false,
+  "hidden_activation": "gelu",
+  "hidden_size": 768,
+  "initializer_cutoff_factor": 2.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 1152,
+  "layer_norm_eps": 1e-05,
+  "layer_types": [
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "local_attention": 128,
+  "mask_token_id": 4,
+  "max_position_embeddings": 32768,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 22,
+  "pad_token_id": 0,
+  "position_embedding_type": "sans_pos",
+  "repad_logits_with_grad": false,
+  "rope_parameters": {
+    "full_attention": {
+      "rope_theta": 160000,
+      "rope_type": "default"
+    },
+    "sliding_attention": {
+      "rope_theta": 160000,
+      "rope_type": "default"
+    }
+  },
+  "sep_token_id": 1,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.6.2",
+  "vocab_size": 256000
+}

text_0_Transformer/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45714faf6758d630a8d7b4a57bd288632ec40172d92490e89534e404207857da
+size 613892480

text_0_Transformer/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "transformer_task": "feature-extraction",
+    "modality_config": {
+        "text": {
+            "method": "forward",
+            "method_output_name": "last_hidden_state"
+        }
+    },
+    "module_output_name": "token_embeddings"
+}

text_0_Transformer/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c925c6b3dbd208644702bb1856672fa7315c6158a753d629470fcf3724ad284c
+size 36944238

text_0_Transformer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<bos>",
+  "eos_token": "<eos>",
+  "is_local": false,
+  "local_files_only": false,
+  "mask_token": "<mask>",
+  "max_length": 32768,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "<eos>",
+  "spaces_between_special_tokens": false,
+  "stride": 0,
+  "tokenizer_class": "TokenizersBackend",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}

text_1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "embedding_dimension": 768,
+    "pooling_mode": "mean",
+    "include_prompt": true
+}