KennethEnevoldsen commited on
Commit
f31bf4b
·
verified ·
1 Parent(s): 746398b

Create models.py

Browse files
Files changed (1) hide show
  1. models.py +688 -0
models.py ADDED
@@ -0,0 +1,688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This file is used to store the list of all models that are used in the MTEB benchmark. It is generated by running the this script.
2
+
3
+ It is intended to be used as a reference for the models that are used in the benchmark, and it used to link the model to the benchmark so
4
+ that it is easier to see that we use the model. Discussed in this issue:
5
+ https://github.com/embeddings-benchmark/mteb/issues/4309
6
+ """
7
+
8
+ from pathlib import Path
9
+
10
+ import mteb
11
+
12
+ path_to_self = Path(__file__)
13
+ models = mteb.get_model_metas()
14
+
15
+ # print all model names and add them to this file as a list of model:
16
+ unused = ["org/model_name"]
17
+
18
+ model_names = [model.name for model in models if model.name not in unused]
19
+
20
+
21
+ def format_list_as_python_code(lst):
22
+ """Format a list of strings as a Python list of strings.
23
+
24
+ Example:
25
+ input: ["model1", "model2", "model3"]
26
+ output:
27
+ models = [
28
+ "model1",
29
+ "model2",
30
+ "model3",
31
+ ]
32
+
33
+ """
34
+ formatted_list = "models = [\n"
35
+ for item in lst:
36
+ formatted_list += f' "{item}",\n'
37
+ formatted_list += "]\n"
38
+ return formatted_list
39
+
40
+
41
+ def insert_into_self(formatted_list):
42
+ """Insert the formatted list into this file between the INSERT START and INSERT END comments."""
43
+ with path_to_self.open("r") as f:
44
+ content = f.read()
45
+
46
+ new_content = content.replace(
47
+ "# INSERT START\n# INSERT END",
48
+ f"# INSERT START\n{formatted_list}\n# INSERT END",
49
+ )
50
+
51
+ with path_to_self.open("w") as f:
52
+ f.write(new_content)
53
+
54
+
55
+ formatted_list = format_list_as_python_code(model_names)
56
+ insert_into_self(formatted_list)
57
+ print(f"Inserted {len(model_names)} models into {path_to_self}")
58
+
59
+ # INSERT START
60
+ models = [
61
+ "Snowflake/snowflake-arctic-embed-l",
62
+ "Snowflake/snowflake-arctic-embed-l-v2.0",
63
+ "Snowflake/snowflake-arctic-embed-m",
64
+ "Snowflake/snowflake-arctic-embed-m-long",
65
+ "Snowflake/snowflake-arctic-embed-m-v1.5",
66
+ "Snowflake/snowflake-arctic-embed-m-v2.0",
67
+ "Snowflake/snowflake-arctic-embed-s",
68
+ "Snowflake/snowflake-arctic-embed-xs",
69
+ "mteb/baseline-bm25s",
70
+ "zeroentropy/zembed-1",
71
+ "zeroentropy/zerank-1",
72
+ "zeroentropy/zerank-1-small",
73
+ "zeroentropy/zerank-2",
74
+ "google/vggish",
75
+ "Omartificial-Intelligence-Space/Arabic-Triplet-Matryoshka-V2",
76
+ "Kingsoft-LLM/QZhou-Embedding",
77
+ "Kingsoft-LLM/QZhou-Embedding-Zh",
78
+ "samaya-ai/promptriever-llama2-7b-v1",
79
+ "samaya-ai/promptriever-llama3.1-8b-v1",
80
+ "samaya-ai/promptriever-llama3.1-8b-instruct-v1",
81
+ "samaya-ai/promptriever-mistral-v0.1-7b-v1",
82
+ "sbintuitions/sarashina-embedding-v1-1b",
83
+ "sbintuitions/sarashina-embedding-v2-1b",
84
+ "mixedbread-ai/mxbai-edge-colbert-v0-17m",
85
+ "mixedbread-ai/mxbai-edge-colbert-v0-32m",
86
+ "mixedbread-ai/mxbai-embed-2d-large-v1",
87
+ "mixedbread-ai/mxbai-embed-large-v1",
88
+ "mixedbread-ai/mxbai-embed-xsmall-v1",
89
+ "mixedbread-ai/mxbai-rerank-base-v1",
90
+ "mixedbread-ai/mxbai-rerank-large-v1",
91
+ "mixedbread-ai/mxbai-rerank-xsmall-v1",
92
+ "vidore/colpali-v1.1",
93
+ "vidore/colpali-v1.2",
94
+ "vidore/colpali-v1.3",
95
+ "voyageai/voyage-multimodal-3",
96
+ "eagerworks/eager-embed-v1",
97
+ "stephantulkens/NIFE-gte-modernbert-base_as_router",
98
+ "stephantulkens/NIFE-mxbai-embed-large-v1_as_router",
99
+ "Bytedance/Seed1.6-embedding",
100
+ "microsoft/LLM2CLIP-Openai-B-16",
101
+ "microsoft/LLM2CLIP-Openai-L-14-224",
102
+ "microsoft/LLM2CLIP-Openai-L-14-336",
103
+ "FacebookAI/xlm-roberta-base",
104
+ "FacebookAI/xlm-roberta-large",
105
+ "codefuse-ai/C2LLM-0.5B",
106
+ "codefuse-ai/C2LLM-7B",
107
+ "codefuse-ai/F2LLM-0.6B",
108
+ "codefuse-ai/F2LLM-1.7B",
109
+ "codefuse-ai/F2LLM-4B",
110
+ "codefuse-ai/F2LLM-v2-0.6B",
111
+ "codefuse-ai/F2LLM-v2-14B",
112
+ "codefuse-ai/F2LLM-v2-160M",
113
+ "codefuse-ai/F2LLM-v2-1.7B",
114
+ "codefuse-ai/F2LLM-v2-330M",
115
+ "codefuse-ai/F2LLM-v2-4B",
116
+ "codefuse-ai/F2LLM-v2-80M",
117
+ "codefuse-ai/F2LLM-v2-8B",
118
+ "ibm-granite/granite-vision-3.3-2b-embedding",
119
+ "openai/clip-vit-base-patch16",
120
+ "openai/clip-vit-base-patch32",
121
+ "openai/clip-vit-large-patch14",
122
+ "shibing624/text2vec-base-chinese",
123
+ "shibing624/text2vec-base-chinese-paraphrase",
124
+ "shibing624/text2vec-base-multilingual",
125
+ "LCO-Embedding/LCO-Embedding-Omni-3B",
126
+ "LCO-Embedding/LCO-Embedding-Omni-7B",
127
+ "kakaobrain/align-base",
128
+ "IEITYuan/Yuan-embedding-2.0-en",
129
+ "facebook/metaclip-2-mt5-worldwide-b32",
130
+ "dmedhi/PawanEmbd-68M",
131
+ "BAAI/bge-reranker-v2-m3",
132
+ "jinaai/jina-reranker-v2-base-multilingual",
133
+ "cross-encoder/ms-marco-MiniLM-L12-v2",
134
+ "cross-encoder/ms-marco-MiniLM-L2-v2",
135
+ "cross-encoder/ms-marco-MiniLM-L4-v2",
136
+ "cross-encoder/ms-marco-MiniLM-L6-v2",
137
+ "cross-encoder/ms-marco-TinyBERT-L2-v2",
138
+ "emillykkejensen/EmbeddingGemma-Scandi-300m",
139
+ "emillykkejensen/mmBERTscandi-base-embedding",
140
+ "emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
141
+ "BAAI/bge-base-en",
142
+ "BAAI/bge-base-en-v1.5",
143
+ "BAAI/bge-base-zh",
144
+ "BAAI/bge-base-zh-v1.5",
145
+ "BAAI/bge-en-icl",
146
+ "BAAI/bge-large-en",
147
+ "BAAI/bge-large-en-v1.5",
148
+ "BAAI/bge-large-zh",
149
+ "BAAI/bge-large-zh-v1.5",
150
+ "BAAI/bge-m3",
151
+ "BAAI/bge-m3-unsupervised",
152
+ "BAAI/bge-multilingual-gemma2",
153
+ "BAAI/bge-small-en",
154
+ "BAAI/bge-small-en-v1.5",
155
+ "BAAI/bge-small-zh",
156
+ "BAAI/bge-small-zh-v1.5",
157
+ "manu/bge-m3-custom-fr",
158
+ "spartan8806/atles-champion-embedding",
159
+ "prdev/mini-gte",
160
+ "SamilPwC-AXNode-GenAI/PwC-Embedding_expr",
161
+ "m3hrdadfi/bert-zwnj-wnli-mean-tokens",
162
+ "sbunlp/fabert",
163
+ "HooshvareLab/bert-base-parsbert-uncased",
164
+ "m3hrdadfi/roberta-zwnj-wnli-mean-tokens",
165
+ "myrkur/sentence-transformer-parsbert-fa",
166
+ "PartAI/TookaBERT-Base",
167
+ "PartAI/Tooka-SBERT",
168
+ "PartAI/Tooka-SBERT-V2-Large",
169
+ "PartAI/Tooka-SBERT-V2-Small",
170
+ "castorini/repllama-v1-7b-lora-passage",
171
+ "samaya-ai/RepLLaMA-reproduced",
172
+ "nomic-ai/nomic-embed-code",
173
+ "nomic-ai/nomic-embed-text-v2-moe",
174
+ "nomic-ai/nomic-embed-text-v1",
175
+ "nomic-ai/nomic-embed-text-v1.5",
176
+ "nomic-ai/nomic-embed-text-v1-ablated",
177
+ "nomic-ai/nomic-embed-text-v1-unsupervised",
178
+ "nomic-ai/modernbert-embed-base",
179
+ "nomic-ai/nomic-embed-vision-v1.5",
180
+ "bflhc/MoD-Embedding",
181
+ "ReasonIR/ReasonIR-8B",
182
+ "yibinlei/LENS-d4000",
183
+ "yibinlei/LENS-d8000",
184
+ "facebook/dinov2-base",
185
+ "facebook/dinov2-giant",
186
+ "facebook/dinov2-large",
187
+ "facebook/dinov2-small",
188
+ "facebook/webssl-dino1b-full2b-224",
189
+ "facebook/webssl-dino2b-full2b-224",
190
+ "facebook/webssl-dino2b-heavy2b-224",
191
+ "facebook/webssl-dino2b-light2b-224",
192
+ "facebook/webssl-dino300m-full2b-224",
193
+ "facebook/webssl-dino3b-full2b-224",
194
+ "facebook/webssl-dino3b-heavy2b-224",
195
+ "facebook/webssl-dino3b-light2b-224",
196
+ "facebook/webssl-dino5b-full2b-224",
197
+ "facebook/webssl-dino7b-full8b-224",
198
+ "facebook/webssl-dino7b-full8b-378",
199
+ "facebook/webssl-dino7b-full8b-518",
200
+ "facebook/webssl-mae1b-full2b-224",
201
+ "facebook/webssl-mae300m-full2b-224",
202
+ "facebook/webssl-mae700m-full2b-224",
203
+ "TencentBAC/Conan-embedding-v2",
204
+ "Gameselo/STS-multilingual-mpnet-base-v2",
205
+ "Haon-Chen/speed-embedding-7b-instruct",
206
+ "Hum-Works/lodestone-base-4096-v1",
207
+ "Jaume/gemma-2b-embeddings",
208
+ "Lajavaness/bilingual-embedding-base",
209
+ "Lajavaness/bilingual-embedding-large",
210
+ "Lajavaness/bilingual-embedding-small",
211
+ "Mihaiii/Bulbasaur",
212
+ "Mihaiii/Ivysaur",
213
+ "Mihaiii/Squirtle",
214
+ "Mihaiii/Venusaur",
215
+ "Mihaiii/Wartortle",
216
+ "Mihaiii/gte-micro",
217
+ "Mihaiii/gte-micro-v4",
218
+ "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka",
219
+ "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet",
220
+ "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka",
221
+ "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka",
222
+ "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet",
223
+ "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka",
224
+ "OrdalieTech/Solon-embeddings-large-0.1",
225
+ "aari1995/German_Semantic_STS_V2",
226
+ "abhinand/MedEmbed-small-v0.1",
227
+ "avsolatorio/GIST-all-MiniLM-L6-v2",
228
+ "avsolatorio/GIST-Embedding-v0",
229
+ "avsolatorio/GIST-large-Embedding-v0",
230
+ "avsolatorio/GIST-small-Embedding-v0",
231
+ "avsolatorio/NoInstruct-small-Embedding-v0",
232
+ "bigscience/sgpt-bloom-7b1-msmarco",
233
+ "brahmairesearch/slx-v0.1",
234
+ "TencentBAC/Conan-embedding-v1",
235
+ "consciousAI/cai-lunaris-text-embeddings",
236
+ "consciousAI/cai-stellaris-text-embeddings",
237
+ "deepfile/embedder-100p",
238
+ "DMetaSoul/Dmeta-embedding-zh-small",
239
+ "dwzhu/e5-base-4k",
240
+ "llmrails/ember-v1",
241
+ "infgrad/stella-base-en-v2",
242
+ "izhx/udever-bloom-1b1",
243
+ "izhx/udever-bloom-3b",
244
+ "izhx/udever-bloom-560m",
245
+ "izhx/udever-bloom-7b1",
246
+ "malenia1/ternary-weight-embedding",
247
+ "manu/sentence_croissant_alpha_v0.2",
248
+ "manu/sentence_croissant_alpha_v0.3",
249
+ "manu/sentence_croissant_alpha_v0.4",
250
+ "omarelshehy/arabic-english-sts-matryoshka",
251
+ "openbmb/MiniCPM-Embedding",
252
+ "DMetaSoul/sbert-chinese-general-v1",
253
+ "sdadas/mmlw-e5-base",
254
+ "sdadas/mmlw-e5-large",
255
+ "sdadas/mmlw-e5-small",
256
+ "sdadas/mmlw-roberta-base",
257
+ "sdadas/mmlw-roberta-large",
258
+ "silma-ai/silma-embeddding-matryoshka-v0.1",
259
+ "thenlper/gte-base",
260
+ "thenlper/gte-large",
261
+ "thenlper/gte-small",
262
+ "lier007/xiaobu-embedding",
263
+ "lier007/xiaobu-embedding-v2",
264
+ "Classical/Yinka",
265
+ "Kowshik24/bangla-sentence-transformer-ft-matryoshka-paraphrase-multilingual-mpnet-base-v2",
266
+ "facebook/wav2vec2-base",
267
+ "facebook/wav2vec2-base-960h",
268
+ "facebook/wav2vec2-large",
269
+ "facebook/wav2vec2-large-xlsr-53",
270
+ "facebook/wav2vec2-lv-60-espeak-cv-ft",
271
+ "facebook/wav2vec2-xls-r-1b",
272
+ "facebook/wav2vec2-xls-r-2b",
273
+ "facebook/wav2vec2-xls-r-2b-21-to-en",
274
+ "facebook/wav2vec2-xls-r-300m",
275
+ "vitouphy/wav2vec2-xls-r-300m-phoneme",
276
+ "laion/clap-htsat-fused",
277
+ "laion/clap-htsat-unfused",
278
+ "laion/larger_clap_general",
279
+ "laion/larger_clap_music",
280
+ "laion/larger_clap_music_and_speech",
281
+ "colbert-ir/colbertv2.0",
282
+ "jinaai/jina-colbert-v2",
283
+ "lightonai/ColBERT-Zero",
284
+ "lightonai/ColBERT-Zero-supervised",
285
+ "lightonai/ColBERT-Zero-unsupervised",
286
+ "lightonai/GTE-ModernColBERT-v1",
287
+ "lightonai/LateOn-Code",
288
+ "lightonai/LateOn-Code-edge",
289
+ "lightonai/LateOn-Code-edge-pretrain",
290
+ "lightonai/LateOn-Code-pretrain",
291
+ "lightonai/Reason-ModernColBERT",
292
+ "OpenSearch-AI/Ops-Colqwen3-4B",
293
+ "Alibaba-NLP/gte-base-en-v1.5",
294
+ "thenlper/gte-base-zh",
295
+ "thenlper/gte-large-zh",
296
+ "Alibaba-NLP/gte-modernbert-base",
297
+ "Alibaba-NLP/gte-multilingual-base",
298
+ "Alibaba-NLP/gte-Qwen1.5-7B-instruct",
299
+ "Alibaba-NLP/gte-Qwen2-1.5B-instruct",
300
+ "Alibaba-NLP/gte-Qwen2-7B-instruct",
301
+ "thenlper/gte-small-zh",
302
+ "jinaai/jina-clip-v1",
303
+ "jinaai/jina-clip-v2",
304
+ "intfloat/e5-base",
305
+ "intfloat/e5-base-v2",
306
+ "intfloat/e5-large-v2",
307
+ "intfloat/e5-small",
308
+ "intfloat/e5-small-v2",
309
+ "intfloat/e5-large",
310
+ "intfloat/multilingual-e5-base",
311
+ "intfloat/multilingual-e5-large",
312
+ "intfloat/multilingual-e5-small",
313
+ "tencent/Youtu-Embedding",
314
+ "moka-ai/m3e-base",
315
+ "moka-ai/m3e-large",
316
+ "moka-ai/m3e-small",
317
+ "Bytedance/Seed1.6-embedding-1215",
318
+ "QuanSun/EVA02-CLIP-B-16",
319
+ "QuanSun/EVA02-CLIP-L-14",
320
+ "QuanSun/EVA02-CLIP-bigE-14",
321
+ "QuanSun/EVA02-CLIP-bigE-14-plus",
322
+ "Salesforce/blip-image-captioning-base",
323
+ "Salesforce/blip-image-captioning-large",
324
+ "Salesforce/blip-itm-base-coco",
325
+ "Salesforce/blip-itm-base-flickr",
326
+ "Salesforce/blip-itm-large-coco",
327
+ "Salesforce/blip-itm-large-flickr",
328
+ "Salesforce/blip-vqa-base",
329
+ "Salesforce/blip-vqa-capfilt-large",
330
+ "Salesforce/SFR-Embedding-2_R",
331
+ "Salesforce/SFR-Embedding-Code-2B_R",
332
+ "Salesforce/SFR-Embedding-Mistral",
333
+ "Cohere/Cohere-embed-v4.0",
334
+ "Cohere/Cohere-embed-v4.0 (output_dtype=binary)",
335
+ "Cohere/Cohere-embed-v4.0 (output_dtype=int8)",
336
+ "cohere/embed-english-v3.0",
337
+ "cohere/embed-multilingual-v3.0",
338
+ "jinaai/jina-embedding-b-en-v1",
339
+ "jinaai/jina-embedding-s-en-v1",
340
+ "jinaai/jina-embeddings-v2-base-en",
341
+ "jinaai/jina-embeddings-v2-small-en",
342
+ "jinaai/jina-embeddings-v3",
343
+ "jinaai/jina-embeddings-v4",
344
+ "jinaai/jina-embeddings-v5-text-nano",
345
+ "jinaai/jina-embeddings-v5-text-small",
346
+ "jinaai/jina-reranker-v3",
347
+ "lyrebird/wav2clip",
348
+ "microsoft/msclap-2022",
349
+ "microsoft/msclap-2023",
350
+ "voyageai/voyage-2",
351
+ "voyageai/voyage-3",
352
+ "voyageai/voyage-3.5",
353
+ "voyageai/voyage-3.5 (output_dtype=binary)",
354
+ "voyageai/voyage-3.5 (output_dtype=int8)",
355
+ "voyageai/voyage-3-m-exp",
356
+ "voyageai/voyage-3-large",
357
+ "voyageai/voyage-3-lite",
358
+ "voyageai/voyage-4",
359
+ "voyageai/voyage-4-large",
360
+ "voyageai/voyage-4-large (embed_dim=2048)",
361
+ "voyageai/voyage-4-lite",
362
+ "voyageai/voyage-4-nano",
363
+ "voyageai/voyage-code-2",
364
+ "voyageai/voyage-code-3",
365
+ "voyageai/voyage-finance-2",
366
+ "voyageai/voyage-large-2",
367
+ "voyageai/voyage-large-2-instruct",
368
+ "voyageai/voyage-law-2",
369
+ "voyageai/voyage-multilingual-2",
370
+ "microsoft/unispeech-sat-base-100h-libri-ft",
371
+ "MongoDB/mdbr-leaf-ir",
372
+ "MongoDB/mdbr-leaf-mt",
373
+ "KennethEnevoldsen/dfm-sentence-encoder-large",
374
+ "KennethEnevoldsen/dfm-sentence-encoder-medium",
375
+ "microsoft/wavlm-base",
376
+ "microsoft/wavlm-base-plus",
377
+ "microsoft/wavlm-base-plus-sd",
378
+ "microsoft/wavlm-base-plus-sv",
379
+ "microsoft/wavlm-base-sd",
380
+ "microsoft/wavlm-base-sv",
381
+ "microsoft/wavlm-large",
382
+ "jxm/cde-small-v1",
383
+ "jxm/cde-small-v2",
384
+ "Sailesh97/Hinvec",
385
+ "w601sxs/b1ade-embed",
386
+ "google/flan-t5-base",
387
+ "google/flan-t5-large",
388
+ "google/flan-t5-xl",
389
+ "google/flan-t5-xxl",
390
+ "jhu-clsp/FollowIR-7B",
391
+ "meta-llama/Llama-2-7b-hf",
392
+ "meta-llama/Llama-2-7b-chat-hf",
393
+ "mistralai/Mistral-7B-Instruct-v0.2",
394
+ "castorini/monot5-3b-msmarco-10k",
395
+ "castorini/monot5-base-msmarco-10k",
396
+ "castorini/monot5-large-msmarco-10k",
397
+ "castorini/monot5-small-msmarco-10k",
398
+ "unicamp-dl/mt5-base-mmarco-v2",
399
+ "facebook/seamless-m4t-v2-large",
400
+ "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
401
+ "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
402
+ "fangxq/XYZ-embedding",
403
+ "rasgaard/m2v-dfm-large",
404
+ "bedrock/amazon-titan-embed-text-v1",
405
+ "bedrock/amazon-titan-embed-text-v2",
406
+ "bedrock/cohere-embed-english-v3",
407
+ "bedrock/cohere-embed-multilingual-v3",
408
+ "Cohere/Cohere-embed-english-v3.0",
409
+ "Cohere/Cohere-embed-english-light-v3.0",
410
+ "Cohere/Cohere-embed-multilingual-v3.0",
411
+ "Cohere/Cohere-embed-multilingual-light-v3.0",
412
+ "bisectgroup/BiCA-base",
413
+ "Qodo/Qodo-Embed-1-1.5B",
414
+ "Qodo/Qodo-Embed-1-7B",
415
+ "WhereIsAI/UAE-Large-V1",
416
+ "GeoGPT-Research-Project/GeoEmbedding",
417
+ "nanovdr/NanoVDR-S-Multi",
418
+ "infgrad/stella-base-zh-v3-1792d",
419
+ "NovaSearch/stella_en_1.5B_v5",
420
+ "NovaSearch/stella_en_400M_v5",
421
+ "dunzhang/stella-large-zh-v3-1792d",
422
+ "dunzhang/stella-mrl-large-zh-v3.5-1792d",
423
+ "iampanda/zpoint_large_embedding_zh",
424
+ "sensenova/piccolo-base-zh",
425
+ "sensenova/piccolo-large-zh-v2",
426
+ "facebook/SONAR",
427
+ "Qwen/Qwen2-Audio-7B",
428
+ "OpenSearch-AI/Ops-MoA-Conan-embedding-v1",
429
+ "OpenSearch-AI/Ops-MoA-Yuan-embedding-1.0",
430
+ "Querit/Querit",
431
+ "facebook/hubert-base-ls960",
432
+ "facebook/hubert-large-ls960-ft",
433
+ "ByteDance-Seed/Seed1.5-Embedding",
434
+ "baseline/Human",
435
+ "bflhc/Octen-Embedding-0.6B",
436
+ "bflhc/Octen-Embedding-4B",
437
+ "bflhc/Octen-Embedding-8B",
438
+ "VPLabs/SearchMap_Preview",
439
+ "ByteDance/ListConRanker",
440
+ "Linq-AI-Research/Linq-Embed-Mistral",
441
+ "infly/inf-retriever-v1",
442
+ "infly/inf-retriever-v1-1.5b",
443
+ "OrdalieTech/Solon-embeddings-mini-beta-1.1",
444
+ "BAAI/bge-visualized-base",
445
+ "BAAI/bge-visualized-m3",
446
+ "laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90K",
447
+ "laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
448
+ "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
449
+ "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
450
+ "laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K",
451
+ "laion/CLIP-ViT-L-14-laion2B-s32B-b82K",
452
+ "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
453
+ "laion/CLIP-ViT-g-14-laion2B-s34B-b88K",
454
+ "qihoo360/Zhinao-ChineseModernBert-Embedding",
455
+ "mteb/baseline-random-cross-encoder",
456
+ "mteb/baseline-random-encoder",
457
+ "nvidia/llama-nemoretriever-colembed-1b-v1",
458
+ "nvidia/llama-nemoretriever-colembed-3b-v1",
459
+ "nvidia/llama-nemotron-colembed-vl-3b-v2",
460
+ "nvidia/llama-nemotron-embed-vl-1b-v2",
461
+ "nvidia/nemotron-colembed-vl-4b-v2",
462
+ "nvidia/nemotron-colembed-vl-8b-v2",
463
+ "infgrad/Jasper-Token-Compression-600M",
464
+ "NovaSearch/jasper_en_vision_language_v1",
465
+ "KBLab/sentence-bert-swedish-cased",
466
+ "cl-nagoya/ruri-base",
467
+ "cl-nagoya/ruri-base-v2",
468
+ "cl-nagoya/ruri-large",
469
+ "cl-nagoya/ruri-large-v2",
470
+ "cl-nagoya/ruri-small",
471
+ "cl-nagoya/ruri-small-v2",
472
+ "cl-nagoya/ruri-v3-130m",
473
+ "cl-nagoya/ruri-v3-30m",
474
+ "cl-nagoya/ruri-v3-310m",
475
+ "cl-nagoya/ruri-v3-70m",
476
+ "Qwen/Qwen3-VL-Embedding-2B",
477
+ "Qwen/Qwen3-VL-Embedding-8B",
478
+ "Salesforce/blip2-opt-2.7b",
479
+ "Salesforce/blip2-opt-6.7b-coco",
480
+ "fyaronskiy/english_code_retriever",
481
+ "BMRetriever/BMRetriever-1B",
482
+ "BMRetriever/BMRetriever-2B",
483
+ "BMRetriever/BMRetriever-410M",
484
+ "BMRetriever/BMRetriever-7B",
485
+ "google/siglip-base-patch16-224",
486
+ "google/siglip-base-patch16-256",
487
+ "google/siglip-base-patch16-256-multilingual",
488
+ "google/siglip-base-patch16-384",
489
+ "google/siglip-base-patch16-512",
490
+ "google/siglip-large-patch16-256",
491
+ "google/siglip-large-patch16-384",
492
+ "google/siglip-so400m-patch14-224",
493
+ "google/siglip-so400m-patch14-384",
494
+ "google/siglip-so400m-patch16-256-i18n",
495
+ "sentence-transformers/all-MiniLM-L12-v2",
496
+ "sentence-transformers/all-MiniLM-L6-v2",
497
+ "sentence-transformers/all-mpnet-base-v2",
498
+ "facebook/contriever-msmarco",
499
+ "sentence-transformers/gtr-t5-base",
500
+ "sentence-transformers/gtr-t5-large",
501
+ "sentence-transformers/gtr-t5-xl",
502
+ "sentence-transformers/gtr-t5-xxl",
503
+ "sentence-transformers/LaBSE",
504
+ "keeeeenw/MicroLlama-text-embedding",
505
+ "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
506
+ "sentence-transformers/multi-qa-mpnet-base-dot-v1",
507
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
508
+ "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
509
+ "sentence-transformers/sentence-t5-base",
510
+ "sentence-transformers/sentence-t5-large",
511
+ "sentence-transformers/sentence-t5-xl",
512
+ "sentence-transformers/sentence-t5-xxl",
513
+ "sentence-transformers/static-retrieval-mrl-en-v1",
514
+ "sentence-transformers/static-similarity-mrl-multilingual-v1",
515
+ "llamaindex/vdr-2b-multi-v1",
516
+ "ICT-TIME-and-Querit/BOOM_4B_v1",
517
+ "royokong/e5-v",
518
+ "facebook/encodec_24khz",
519
+ "amazon/Titan-text-embeddings-v2",
520
+ "openai/text-embedding-3-large",
521
+ "openai/text-embedding-3-large (embed_dim=512)",
522
+ "openai/text-embedding-3-small",
523
+ "openai/text-embedding-3-small (embed_dim=512)",
524
+ "openai/text-embedding-ada-002",
525
+ "IEITYuan/Yuan-embedding-2.0-zh",
526
+ "sergeyzh/BERTA",
527
+ "deepvk/deberta-v1-base",
528
+ "DeepPavlov/distilrubert-small-cased-conversational",
529
+ "ai-forever/FRIDA",
530
+ "ai-sage/Giga-Embeddings-instruct",
531
+ "cointegrated/LaBSE-en-ru",
532
+ "sergeyzh/LaBSE-ru-turbo",
533
+ "ai-forever/ru-en-RoSBERTa",
534
+ "DeepPavlov/rubert-base-cased",
535
+ "DeepPavlov/rubert-base-cased-sentence",
536
+ "sergeyzh/rubert-mini-frida",
537
+ "cointegrated/rubert-tiny",
538
+ "cointegrated/rubert-tiny2",
539
+ "sergeyzh/rubert-tiny-turbo",
540
+ "ai-forever/sbert_large_mt_nlu_ru",
541
+ "ai-forever/sbert_large_nlu_ru",
542
+ "deepvk/USER2-base",
543
+ "deepvk/USER2-small",
544
+ "deepvk/USER-base",
545
+ "deepvk/USER-bge-m3",
546
+ "nvidia/NV-Embed-v1",
547
+ "nvidia/NV-Embed-v2",
548
+ "nvidia/llama-embed-nemotron-8b",
549
+ "nvidia/llama-nemotron-rerank-1b-v2",
550
+ "NbAiLab/nb-bert-base",
551
+ "NbAiLab/nb-bert-large",
552
+ "NbAiLab/nb-sbert-base",
553
+ "GritLM/GritLM-7B",
554
+ "GritLM/GritLM-8x7B",
555
+ "speechbrain/m-ctc-t-large",
556
+ "clips/e5-base-trm-nl",
557
+ "clips/e5-large-trm-nl",
558
+ "clips/e5-small-trm-nl",
559
+ "andersborges/model2vecdk",
560
+ "andersborges/model2vecdk-stem",
561
+ "MIT/ast-finetuned-audioset-10-10-0.4593",
562
+ "KFST/XLMRoberta-en-da-sv-nb",
563
+ "panalexeu/xlm-roberta-ua-distilled",
564
+ "nyu-visionx/moco-v3-vit-b",
565
+ "nyu-visionx/moco-v3-vit-l",
566
+ "Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
567
+ "minishlab/M2V_base_glove",
568
+ "minishlab/M2V_base_glove_subword",
569
+ "minishlab/M2V_base_output",
570
+ "minishlab/M2V_multilingual_output",
571
+ "minishlab/potion-base-2M",
572
+ "minishlab/potion-base-32M",
573
+ "minishlab/potion-base-4M",
574
+ "minishlab/potion-base-8M",
575
+ "minishlab/potion-multilingual-128M",
576
+ "minishlab/potion-retrieval-32M",
577
+ "NeuML/pubmedbert-base-embeddings-100K",
578
+ "NeuML/pubmedbert-base-embeddings-1M",
579
+ "NeuML/pubmedbert-base-embeddings-2M",
580
+ "NeuML/pubmedbert-base-embeddings-500K",
581
+ "NeuML/pubmedbert-base-embeddings-8M",
582
+ "asapp/sew-d-base-plus-400k-ft-ls100h",
583
+ "asapp/sew-d-mid-400k-ft-ls100h",
584
+ "asapp/sew-d-tiny-100k-ft-ls100h",
585
+ "telepix/PIXIE-Rune-v1.0",
586
+ "VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
587
+ "VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
588
+ "VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
589
+ "VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
590
+ "VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
591
+ "VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
592
+ "ibm-granite/granite-embedding-107m-multilingual",
593
+ "ibm-granite/granite-embedding-125m-english",
594
+ "ibm-granite/granite-embedding-278m-multilingual",
595
+ "ibm-granite/granite-embedding-30m-english",
596
+ "ibm-granite/granite-embedding-english-r2",
597
+ "ibm-granite/granite-embedding-small-english-r2",
598
+ "geoffsee/auto-g-embed-st",
599
+ "nomic-ai/colnomic-embed-multimodal-3b",
600
+ "nomic-ai/colnomic-embed-multimodal-7b",
601
+ "vidore/colqwen2-v1.0",
602
+ "vidore/colqwen2.5-v0.2",
603
+ "TomoroAI/tomoro-colqwen3-embed-4b",
604
+ "athrael-soju/colqwen3.5-4.5B-v3",
605
+ "TomoroAI/tomoro-colqwen3-embed-8b",
606
+ "ApsaraStackMaaS/EvoQwen2.5-VL-Retriever-3B-v1",
607
+ "ApsaraStackMaaS/EvoQwen2.5-VL-Retriever-7B-v1",
608
+ "BeastyZ/e5-R-mistral-7b",
609
+ "intfloat/multilingual-e5-large-instruct",
610
+ "intfloat/e5-mistral-7b-instruct",
611
+ "zeta-alpha-ai/Zeta-Alpha-E5-Mistral",
612
+ "google/yamnet",
613
+ "codesage/codesage-base-v2",
614
+ "codesage/codesage-large-v2",
615
+ "codesage/codesage-small-v2",
616
+ "Tarka-AIR/Tarka-Embedding-150M-V1",
617
+ "Tarka-AIR/Tarka-Embedding-350M-V1",
618
+ "perplexity-ai/pplx-embed-v1-0.6b",
619
+ "perplexity-ai/pplx-embed-v1-4b",
620
+ "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1",
621
+ "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5",
622
+ "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
623
+ "HIT-TMG/KaLM-embedding-multilingual-mini-v1",
624
+ "KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
625
+ "tencent/KaLM-Embedding-Gemma3-12B-2511",
626
+ "Qwen/Qwen3-Embedding-0.6B",
627
+ "Qwen/Qwen3-Embedding-4B",
628
+ "Qwen/Qwen3-Embedding-8B",
629
+ "annamodels/LGAI-Embedding-Preview",
630
+ "opensearch-project/opensearch-neural-sparse-encoding-doc-v1",
631
+ "opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill",
632
+ "opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini",
633
+ "opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill",
634
+ "opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte",
635
+ "AITeamVN/Vietnamese_Embedding",
636
+ "bkai-foundation-models/vietnamese-bi-encoder",
637
+ "contextboxai/halong_embedding",
638
+ "GreenNode/GreenNode-Embedding-E5-Large-VN-V1",
639
+ "GreenNode/GreenNode-Embedding-KaLM-Mini-Instruct-VN-V1",
640
+ "GreenNode/GreenNode-Embedding-Large-VN-Mixed-V1",
641
+ "GreenNode/GreenNode-Embedding-Large-VN-V1",
642
+ "VoVanPhuc/sup-SimCSE-VietNamese-phobert-base",
643
+ "mteb/baseline-bb25",
644
+ "openai/whisper-base",
645
+ "openai/whisper-large-v3",
646
+ "openai/whisper-medium",
647
+ "openai/whisper-small",
648
+ "openai/whisper-tiny",
649
+ "microsoft/speecht5_asr",
650
+ "microsoft/speecht5_multimodal",
651
+ "microsoft/speecht5_tts",
652
+ "Mira190/Euler-Legal-Embedding-V1",
653
+ "MCINext/Hakim",
654
+ "MCINext/Hakim-small",
655
+ "MCINext/Hakim-unsup",
656
+ "TIGER-Lab/VLM2Vec-Full",
657
+ "TIGER-Lab/VLM2Vec-LoRA",
658
+ "richinfoai/ritrieve_zh_v1",
659
+ "vidore/colSmol-256M",
660
+ "vidore/colSmol-500M",
661
+ "facebook/mms-1b-all",
662
+ "facebook/mms-1b-fl102",
663
+ "facebook/mms-1b-l1107",
664
+ "facebook/data2vec-audio-base-960h",
665
+ "facebook/data2vec-audio-large-960h",
666
+ "ManiacLabs/miniac-embed",
667
+ "nomic-ai/nomic-embed-multimodal-3b",
668
+ "nomic-ai/nomic-embed-multimodal-7b",
669
+ "manveertamber/cadet-embed-base-v1",
670
+ "google/embeddinggemma-300m",
671
+ "google/gemini-embedding-001",
672
+ "google/text-embedding-004",
673
+ "google/text-embedding-005",
674
+ "google/text-multilingual-embedding-002",
675
+ "OpenMuQ/MuQ-MuLan-large",
676
+ "speechbrain/cnn14-esc50",
677
+ "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised",
678
+ "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse",
679
+ "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised",
680
+ "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse",
681
+ "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
682
+ "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse",
683
+ "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
684
+ "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse",
685
+ "intfloat/mmE5-mllama-11b-instruct",
686
+ ]
687
+
688
+ # INSERT END