tomaarsen HF Staff commited on
Commit
1a9b74b
·
verified ·
1 Parent(s): 023fc8f

Add new CrossEncoder model

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "embedding_dimension": 384,
3
+ "pooling_mode": "cls",
4
+ "include_prompt": true
5
+ }
2_Dense/config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "in_features": 384,
3
+ "out_features": 384,
4
+ "bias": false,
5
+ "activation_function": "torch.nn.modules.activation.GELU",
6
+ "module_input_name": "sentence_embedding",
7
+ "module_output_name": "sentence_embedding"
8
+ }
2_Dense/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae4d8431b8ed8afeda16dd51608c4cb7e0f810724a91423dbbb867c047285000
3
+ size 589912
3_LayerNorm/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "dimension": 384
3
+ }
3_LayerNorm/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e8b5ae0d2716b538d0cdd8664bef438f20a09a1b79654fc09f19560197d3274
3
+ size 3224
4_Dense/config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "in_features": 384,
3
+ "out_features": 1,
4
+ "bias": true,
5
+ "activation_function": "torch.nn.modules.linear.Identity",
6
+ "module_input_name": "sentence_embedding",
7
+ "module_output_name": "scores"
8
+ }
4_Dense/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03aa39279659756a52fb02cd569b97f2d1fb0d8036ab74d45c4736f358389e01
3
+ size 1684
README.md ADDED
@@ -0,0 +1,688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - cross-encoder
8
+ - reranker
9
+ - generated_from_trainer
10
+ - dataset_size:143393475
11
+ - loss:MSELoss
12
+ base_model: jhu-clsp/ettin-encoder-32m
13
+ pipeline_tag: text-ranking
14
+ library_name: sentence-transformers
15
+ metrics:
16
+ - map
17
+ - mrr@10
18
+ - ndcg@10
19
+ model-index:
20
+ - name: Production Stage 1 (32m, MSE distill from mxbai-rerank-large-v2)
21
+ results:
22
+ - task:
23
+ type: cross-encoder-reranking
24
+ name: Cross Encoder Reranking
25
+ dataset:
26
+ name: NanoMSMARCO R100
27
+ type: NanoMSMARCO_R100
28
+ metrics:
29
+ - type: map
30
+ value: 0.6366
31
+ name: Map
32
+ - type: mrr@10
33
+ value: 0.6537
34
+ name: Mrr@10
35
+ - type: ndcg@10
36
+ value: 0.7111
37
+ name: Ndcg@10
38
+ - task:
39
+ type: cross-encoder-reranking
40
+ name: Cross Encoder Reranking
41
+ dataset:
42
+ name: NanoNFCorpus R100
43
+ type: NanoNFCorpus_R100
44
+ metrics:
45
+ - type: map
46
+ value: 0.3534
47
+ name: Map
48
+ - type: mrr@10
49
+ value: 0.5453
50
+ name: Mrr@10
51
+ - type: ndcg@10
52
+ value: 0.3777
53
+ name: Ndcg@10
54
+ - task:
55
+ type: cross-encoder-reranking
56
+ name: Cross Encoder Reranking
57
+ dataset:
58
+ name: NanoNQ R100
59
+ type: NanoNQ_R100
60
+ metrics:
61
+ - type: map
62
+ value: 0.7338
63
+ name: Map
64
+ - type: mrr@10
65
+ value: 0.7672
66
+ name: Mrr@10
67
+ - type: ndcg@10
68
+ value: 0.7873
69
+ name: Ndcg@10
70
+ - task:
71
+ type: cross-encoder-reranking
72
+ name: Cross Encoder Reranking
73
+ dataset:
74
+ name: NanoFiQA2018 R100
75
+ type: NanoFiQA2018_R100
76
+ metrics:
77
+ - type: map
78
+ value: 0.4774
79
+ name: Map
80
+ - type: mrr@10
81
+ value: 0.604
82
+ name: Mrr@10
83
+ - type: ndcg@10
84
+ value: 0.5401
85
+ name: Ndcg@10
86
+ - task:
87
+ type: cross-encoder-reranking
88
+ name: Cross Encoder Reranking
89
+ dataset:
90
+ name: NanoTouche2020 R100
91
+ type: NanoTouche2020_R100
92
+ metrics:
93
+ - type: map
94
+ value: 0.4899
95
+ name: Map
96
+ - type: mrr@10
97
+ value: 0.7815
98
+ name: Mrr@10
99
+ - type: ndcg@10
100
+ value: 0.5663
101
+ name: Ndcg@10
102
+ - task:
103
+ type: cross-encoder-reranking
104
+ name: Cross Encoder Reranking
105
+ dataset:
106
+ name: NanoSciFact R100
107
+ type: NanoSciFact_R100
108
+ metrics:
109
+ - type: map
110
+ value: 0.7029
111
+ name: Map
112
+ - type: mrr@10
113
+ value: 0.7147
114
+ name: Mrr@10
115
+ - type: ndcg@10
116
+ value: 0.7433
117
+ name: Ndcg@10
118
+ - task:
119
+ type: cross-encoder-reranking
120
+ name: Cross Encoder Reranking
121
+ dataset:
122
+ name: NanoHotpotQA R100
123
+ type: NanoHotpotQA_R100
124
+ metrics:
125
+ - type: map
126
+ value: 0.9193
127
+ name: Map
128
+ - type: mrr@10
129
+ value: 0.98
130
+ name: Mrr@10
131
+ - type: ndcg@10
132
+ value: 0.9501
133
+ name: Ndcg@10
134
+ - task:
135
+ type: cross-encoder-reranking
136
+ name: Cross Encoder Reranking
137
+ dataset:
138
+ name: NanoArguAna R100
139
+ type: NanoArguAna_R100
140
+ metrics:
141
+ - type: map
142
+ value: 0.5671
143
+ name: Map
144
+ - type: mrr@10
145
+ value: 0.5932
146
+ name: Mrr@10
147
+ - type: ndcg@10
148
+ value: 0.6787
149
+ name: Ndcg@10
150
+ - task:
151
+ type: cross-encoder-reranking
152
+ name: Cross Encoder Reranking
153
+ dataset:
154
+ name: NanoFEVER R100
155
+ type: NanoFEVER_R100
156
+ metrics:
157
+ - type: map
158
+ value: 0.9325
159
+ name: Map
160
+ - type: mrr@10
161
+ value: 0.9567
162
+ name: Mrr@10
163
+ - type: ndcg@10
164
+ value: 0.9512
165
+ name: Ndcg@10
166
+ - task:
167
+ type: cross-encoder-reranking
168
+ name: Cross Encoder Reranking
169
+ dataset:
170
+ name: NanoDBPedia R100
171
+ type: NanoDBPedia_R100
172
+ metrics:
173
+ - type: map
174
+ value: 0.6413
175
+ name: Map
176
+ - type: mrr@10
177
+ value: 0.8847
178
+ name: Mrr@10
179
+ - type: ndcg@10
180
+ value: 0.7178
181
+ name: Ndcg@10
182
+ - task:
183
+ type: cross-encoder-reranking
184
+ name: Cross Encoder Reranking
185
+ dataset:
186
+ name: NanoClimateFEVER R100
187
+ type: NanoClimateFEVER_R100
188
+ metrics:
189
+ - type: map
190
+ value: 0.4464
191
+ name: Map
192
+ - type: mrr@10
193
+ value: 0.696
194
+ name: Mrr@10
195
+ - type: ndcg@10
196
+ value: 0.5251
197
+ name: Ndcg@10
198
+ - task:
199
+ type: cross-encoder-reranking
200
+ name: Cross Encoder Reranking
201
+ dataset:
202
+ name: NanoSCIDOCS R100
203
+ type: NanoSCIDOCS_R100
204
+ metrics:
205
+ - type: map
206
+ value: 0.2919
207
+ name: Map
208
+ - type: mrr@10
209
+ value: 0.5483
210
+ name: Mrr@10
211
+ - type: ndcg@10
212
+ value: 0.3569
213
+ name: Ndcg@10
214
+ - task:
215
+ type: cross-encoder-reranking
216
+ name: Cross Encoder Reranking
217
+ dataset:
218
+ name: NanoQuoraRetrieval R100
219
+ type: NanoQuoraRetrieval_R100
220
+ metrics:
221
+ - type: map
222
+ value: 0.9297
223
+ name: Map
224
+ - type: mrr@10
225
+ value: 0.9617
226
+ name: Mrr@10
227
+ - type: ndcg@10
228
+ value: 0.9538
229
+ name: Ndcg@10
230
+ - task:
231
+ type: cross-encoder-nano-beir
232
+ name: Cross Encoder Nano BEIR
233
+ dataset:
234
+ name: NanoBEIR R100 mean
235
+ type: NanoBEIR_R100_mean
236
+ metrics:
237
+ - type: map
238
+ value: 0.6248
239
+ name: Map
240
+ - type: mrr@10
241
+ value: 0.7452
242
+ name: Mrr@10
243
+ - type: ndcg@10
244
+ value: 0.6815
245
+ name: Ndcg@10
246
+ ---
247
+
248
+ # Production Stage 1 (32m, MSE distill from mxbai-rerank-large-v2)
249
+
250
+ This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [jhu-clsp/ettin-encoder-32m](https://huggingface.co/jhu-clsp/ettin-encoder-32m) using the [sentence-transformers](https://www.SBERT.net) library. It computes scores for pairs of texts, which can be used for text reranking and semantic search.
251
+
252
+ ## Model Details
253
+
254
+ ### Model Description
255
+ - **Model Type:** Cross Encoder
256
+ - **Base model:** [jhu-clsp/ettin-encoder-32m](https://huggingface.co/jhu-clsp/ettin-encoder-32m) <!-- at revision 1b8ba06455dd44f80fc9c1ca9e22806157a57379 -->
257
+ - **Maximum Sequence Length:** 7999 tokens
258
+ - **Number of Output Labels:** 1 label
259
+ - **Supported Modality:** Text
260
+ <!-- - **Training Dataset:** Unknown -->
261
+ - **Language:** en
262
+ - **License:** apache-2.0
263
+
264
+ ### Model Sources
265
+
266
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
267
+ - **Documentation:** [Cross Encoder Documentation](https://www.sbert.net/docs/cross_encoder/usage/usage.html)
268
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
269
+ - **Hugging Face:** [Cross Encoders on Hugging Face](https://huggingface.co/models?library=sentence-transformers&other=cross-encoder)
270
+
271
+ ### Full Model Architecture
272
+
273
+ ```
274
+ CrossEncoder(
275
+ (0): Transformer({'transformer_task': 'feature-extraction', 'modality_config': {'text': {'method': 'forward', 'method_output_name': 'last_hidden_state'}}, 'module_output_name': 'token_embeddings', 'architecture': 'ModernBertModel'})
276
+ (1): Pooling({'embedding_dimension': 384, 'pooling_mode': 'cls', 'include_prompt': True})
277
+ (2): Dense({'in_features': 384, 'out_features': 384, 'bias': False, 'activation_function': 'torch.nn.modules.activation.GELU', 'module_input_name': 'sentence_embedding', 'module_output_name': 'sentence_embedding'})
278
+ (3): LayerNorm({'dimension': 384})
279
+ (4): Dense({'in_features': 384, 'out_features': 1, 'bias': True, 'activation_function': 'torch.nn.modules.linear.Identity', 'module_input_name': 'sentence_embedding', 'module_output_name': 'scores'})
280
+ )
281
+ ```
282
+
283
+ ## Usage
284
+
285
+ ### Direct Usage (Sentence Transformers)
286
+
287
+ First install the Sentence Transformers library:
288
+
289
+ ```bash
290
+ pip install -U sentence-transformers
291
+ ```
292
+
293
+ Then you can load this model and run inference.
294
+ ```python
295
+ from sentence_transformers import CrossEncoder
296
+
297
+ # Download from the 🤗 Hub
298
+ model = CrossEncoder("cross-encoder/ettin-reranker-32m-v1")
299
+ # Get scores for pairs of inputs
300
+ pairs = [
301
+ ['Why do we need binomial distribution?', 'Why is the binomial distribution important?'],
302
+ ['I already have Windows 10, can I delete Windows.old?', 'After resetting windows 10, can I safely delete the "old windows" folder?'],
303
+ ['How can guys last longer during sex?', 'How do men last longer in bed?'],
304
+ ['I feel depressed all the time. What do I do?', 'I feel depressed all the time, what should I do?'],
305
+ ['How is Gal Gadot as a woman and person?', 'How is Gal Gadot as a woman?'],
306
+ ]
307
+ scores = model.predict(pairs)
308
+ print(scores)
309
+ # [12.4375 11.4375 11.125 15.125 14.0625]
310
+
311
+ # Or rank different texts based on similarity to a single text
312
+ ranks = model.rank(
313
+ 'Why do we need binomial distribution?',
314
+ [
315
+ 'Why is the binomial distribution important?',
316
+ 'After resetting windows 10, can I safely delete the "old windows" folder?',
317
+ 'How do men last longer in bed?',
318
+ 'I feel depressed all the time, what should I do?',
319
+ 'How is Gal Gadot as a woman?',
320
+ ]
321
+ )
322
+ # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
323
+ ```
324
+
325
+ <!--
326
+ ### Direct Usage (Transformers)
327
+
328
+ <details><summary>Click to see the direct usage in Transformers</summary>
329
+
330
+ </details>
331
+ -->
332
+
333
+ <!--
334
+ ### Downstream Usage (Sentence Transformers)
335
+
336
+ You can finetune this model on your own dataset.
337
+
338
+ <details><summary>Click to expand</summary>
339
+
340
+ </details>
341
+ -->
342
+
343
+ <!--
344
+ ### Out-of-Scope Use
345
+
346
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
347
+ -->
348
+
349
+ ## Evaluation
350
+
351
+ ### Metrics
352
+
353
+ #### Cross Encoder Reranking
354
+
355
+ * Datasets: `NanoMSMARCO_R100`, `NanoNFCorpus_R100`, `NanoNQ_R100`, `NanoFiQA2018_R100`, `NanoTouche2020_R100`, `NanoSciFact_R100`, `NanoHotpotQA_R100`, `NanoArguAna_R100`, `NanoFEVER_R100`, `NanoDBPedia_R100`, `NanoClimateFEVER_R100`, `NanoSCIDOCS_R100` and `NanoQuoraRetrieval_R100`
356
+ * Evaluated with [<code>CrossEncoderRerankingEvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderRerankingEvaluator) with these parameters:
357
+ ```json
358
+ {
359
+ "at_k": 10,
360
+ "always_rerank_positives": true
361
+ }
362
+ ```
363
+
364
+ | Metric | NanoMSMARCO_R100 | NanoNFCorpus_R100 | NanoNQ_R100 | NanoFiQA2018_R100 | NanoTouche2020_R100 | NanoSciFact_R100 | NanoHotpotQA_R100 | NanoArguAna_R100 | NanoFEVER_R100 | NanoDBPedia_R100 | NanoClimateFEVER_R100 | NanoSCIDOCS_R100 | NanoQuoraRetrieval_R100 |
365
+ |:------------|:---------------------|:---------------------|:---------------------|:---------------------|:---------------------|:---------------------|:---------------------|:---------------------|:---------------------|:---------------------|:----------------------|:---------------------|:------------------------|
366
+ | map | 0.6366 (+0.1470) | 0.3534 (+0.0924) | 0.7338 (+0.3142) | 0.4774 (+0.1124) | 0.4899 (-0.0600) | 0.7029 (+0.0332) | 0.9193 (+0.1510) | 0.5671 (+0.1564) | 0.9325 (+0.1606) | 0.6413 (+0.1295) | 0.4464 (+0.2061) | 0.2919 (+0.0176) | 0.9297 (+0.0988) |
367
+ | mrr@10 | 0.6537 (+0.1762) | 0.5453 (+0.0454) | 0.7672 (+0.3406) | 0.6040 (+0.1132) | 0.7815 (-0.1257) | 0.7147 (+0.0366) | 0.9800 (+0.0571) | 0.5932 (+0.2001) | 0.9567 (+0.1766) | 0.8847 (+0.0840) | 0.6960 (+0.2922) | 0.5483 (-0.0112) | 0.9617 (+0.0935) |
368
+ | **ndcg@10** | **0.7111 (+0.1707)** | **0.3777 (+0.0526)** | **0.7873 (+0.2866)** | **0.5401 (+0.1027)** | **0.5663 (-0.1275)** | **0.7433 (+0.0334)** | **0.9501 (+0.1224)** | **0.6787 (+0.1898)** | **0.9512 (+0.1418)** | **0.7178 (+0.1034)** | **0.5251 (+0.2074)** | **0.3569 (+0.0218)** | **0.9538 (+0.0852)** |
369
+
370
+ #### Cross Encoder Nano BEIR
371
+
372
+ * Dataset: `NanoBEIR_R100_mean`
373
+ * Evaluated with [<code>CrossEncoderNanoBEIREvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderNanoBEIREvaluator) with these parameters:
374
+ ```json
375
+ {
376
+ "dataset_names": [
377
+ "msmarco",
378
+ "nfcorpus",
379
+ "nq",
380
+ "fiqa2018",
381
+ "touche2020",
382
+ "scifact",
383
+ "hotpotqa",
384
+ "arguana",
385
+ "fever",
386
+ "dbpedia",
387
+ "climatefever",
388
+ "scidocs",
389
+ "quoraretrieval"
390
+ ],
391
+ "dataset_id": "sentence-transformers/NanoBEIR-en",
392
+ "rerank_k": 100,
393
+ "at_k": 10,
394
+ "always_rerank_positives": true
395
+ }
396
+ ```
397
+
398
+ | Metric | Value |
399
+ |:------------|:---------------------|
400
+ | map | 0.6248 (+0.1199) |
401
+ | mrr@10 | 0.7452 (+0.1137) |
402
+ | **ndcg@10** | **0.6815 (+0.1069)** |
403
+
404
+ <!--
405
+ ## Bias, Risks and Limitations
406
+
407
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
408
+ -->
409
+
410
+ <!--
411
+ ### Recommendations
412
+
413
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
414
+ -->
415
+
416
+ ## Training Details
417
+
418
+ ### Training Dataset
419
+
420
+ #### Unnamed Dataset
421
+
422
+ * Size: 143,393,475 training samples
423
+ * Columns: <code>query</code>, <code>document</code>, and <code>label</code>
424
+ * Approximate statistics based on the first 1000 samples:
425
+ | | query | document | label |
426
+ |:--------|:------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------|
427
+ | type | string | string | float |
428
+ | details | <ul><li>min: 26 characters</li><li>mean: 55.52 characters</li><li>max: 249 characters</li></ul> | <ul><li>min: 63 characters</li><li>mean: 659.91 characters</li><li>max: 3975 characters</li></ul> | <ul><li>min: -2.94</li><li>mean: 8.51</li><li>max: 13.88</li></ul> |
429
+ * Samples:
430
+ | query | document | label |
431
+ |:----------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------|
432
+ | <code>Help me with my Reborn performance</code> | <code>I was reading the comment section for Dotacinema's world of dota video, and a bunch of people were complaining how there were a lot of bugs and some talked about PERFORMANCE ISSUES. But there were also people saying that reborn has actually IMPROVED their gameplay?
433
+
434
+
435
+ I am one of those people who is running into performance issues and would desperately like to know how some are getting BETTER performance while others like me are getting worse. I'm not complaining about bugs, I'm complaing about framerate, I use to get 60 fps solid in source 1 but I now have 40 or at worst 30 fps in source 2.
436
+ I have an i3 processor/gtx560ti/16gb RAM
437
+
438
+ i dont think it's a potato pc, so I dont know what's happening, I cleaned my computer recently so dust isnt affecting anything in anyway.
439
+ So if you gained or had IMPROVED performance in source 2 please list the settings you are enabling, so I can see where I am at fault. (v sync is off btw)
440
+
441
+ TLDR: Have bad performance now from source 2, if you have good p...</code> | <code>9.5</code> |
442
+ | <code>Really wanna try out the game and expansion, ~$60 is hefty. Likelihood of sales?</code> | <code>As per title, steam sells the game and its expansions for $60 total. Heavy price to drop. Are there sales on any other website? This game looks fantastic to immerse in otherwise and I'm pleased that this subreddit has at least some attention to help out new folks!</code> | <code>9.25</code> |
443
+ | <code>Your Avatar. [MGSV Spoilers]</code> | <code>Was anyone else suprised he actually replaces the snake model in some cutscenes. I've only tried the first Quiet cutscenes, i was just amazed I haven't seen anybody else say this yet.<br>Sorry if repost.</code> | <code>5.25</code> |
444
+ * Loss: [<code>MSELoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#mseloss) with these parameters:
445
+ ```json
446
+ {
447
+ "activation_fn": "torch.nn.modules.linear.Identity"
448
+ }
449
+ ```
450
+
451
+ ### Evaluation Dataset
452
+
453
+ #### Unnamed Dataset
454
+
455
+ * Size: 5,000 evaluation samples
456
+ * Columns: <code>query</code>, <code>document</code>, and <code>label</code>
457
+ * Approximate statistics based on the first 1000 samples:
458
+ | | query | document | label |
459
+ |:--------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------|
460
+ | type | string | string | float |
461
+ | details | <ul><li>min: 14 characters</li><li>mean: 52.62 characters</li><li>max: 168 characters</li></ul> | <ul><li>min: 11 characters</li><li>mean: 50.12 characters</li><li>max: 184 characters</li></ul> | <ul><li>min: 4.44</li><li>mean: 13.49</li><li>max: 18.62</li></ul> |
462
+ * Samples:
463
+ | query | document | label |
464
+ |:------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------|
465
+ | <code>Why do we need binomial distribution?</code> | <code>Why is the binomial distribution important?</code> | <code>11.375</code> |
466
+ | <code>I already have Windows 10, can I delete Windows.old?</code> | <code>After resetting windows 10, can I safely delete the "old windows" folder?</code> | <code>10.875</code> |
467
+ | <code>How can guys last longer during sex?</code> | <code>How do men last longer in bed?</code> | <code>10.8125</code> |
468
+ * Loss: [<code>MSELoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#mseloss) with these parameters:
469
+ ```json
470
+ {
471
+ "activation_fn": "torch.nn.modules.linear.Identity"
472
+ }
473
+ ```
474
+
475
+ ### Training Hyperparameters
476
+ #### Non-Default Hyperparameters
477
+
478
+ - `per_device_train_batch_size`: 64
479
+ - `num_train_epochs`: 1
480
+ - `learning_rate`: 0.00012
481
+ - `warmup_steps`: 0.03
482
+ - `bf16`: True
483
+ - `per_device_eval_batch_size`: 64
484
+ - `load_best_model_at_end`: True
485
+ - `seed`: 12
486
+ - `dataloader_num_workers`: 4
487
+
488
+ #### All Hyperparameters
489
+ <details><summary>Click to expand</summary>
490
+
491
+ - `per_device_train_batch_size`: 64
492
+ - `num_train_epochs`: 1
493
+ - `max_steps`: -1
494
+ - `learning_rate`: 0.00012
495
+ - `lr_scheduler_type`: linear
496
+ - `lr_scheduler_kwargs`: None
497
+ - `warmup_steps`: 0.03
498
+ - `optim`: adamw_torch
499
+ - `optim_args`: None
500
+ - `weight_decay`: 0.0
501
+ - `adam_beta1`: 0.9
502
+ - `adam_beta2`: 0.999
503
+ - `adam_epsilon`: 1e-08
504
+ - `optim_target_modules`: None
505
+ - `gradient_accumulation_steps`: 1
506
+ - `average_tokens_across_devices`: True
507
+ - `max_grad_norm`: 1.0
508
+ - `label_smoothing_factor`: 0.0
509
+ - `bf16`: True
510
+ - `fp16`: False
511
+ - `bf16_full_eval`: False
512
+ - `fp16_full_eval`: False
513
+ - `tf32`: None
514
+ - `gradient_checkpointing`: False
515
+ - `gradient_checkpointing_kwargs`: None
516
+ - `torch_compile`: False
517
+ - `torch_compile_backend`: None
518
+ - `torch_compile_mode`: None
519
+ - `use_liger_kernel`: False
520
+ - `liger_kernel_config`: None
521
+ - `use_cache`: False
522
+ - `neftune_noise_alpha`: None
523
+ - `torch_empty_cache_steps`: None
524
+ - `auto_find_batch_size`: False
525
+ - `log_on_each_node`: True
526
+ - `logging_nan_inf_filter`: True
527
+ - `include_num_input_tokens_seen`: no
528
+ - `log_level`: passive
529
+ - `log_level_replica`: warning
530
+ - `disable_tqdm`: False
531
+ - `project`: huggingface
532
+ - `trackio_space_id`: None
533
+ - `trackio_bucket_id`: None
534
+ - `trackio_static_space_id`: None
535
+ - `per_device_eval_batch_size`: 64
536
+ - `prediction_loss_only`: True
537
+ - `eval_on_start`: False
538
+ - `eval_do_concat_batches`: True
539
+ - `eval_use_gather_object`: False
540
+ - `eval_accumulation_steps`: None
541
+ - `include_for_metrics`: []
542
+ - `batch_eval_metrics`: False
543
+ - `save_only_model`: False
544
+ - `save_on_each_node`: False
545
+ - `enable_jit_checkpoint`: False
546
+ - `push_to_hub`: False
547
+ - `hub_private_repo`: None
548
+ - `hub_model_id`: None
549
+ - `hub_strategy`: every_save
550
+ - `hub_always_push`: False
551
+ - `hub_revision`: None
552
+ - `load_best_model_at_end`: True
553
+ - `ignore_data_skip`: False
554
+ - `restore_callback_states_from_checkpoint`: False
555
+ - `full_determinism`: False
556
+ - `seed`: 12
557
+ - `data_seed`: None
558
+ - `use_cpu`: False
559
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
560
+ - `parallelism_config`: None
561
+ - `dataloader_drop_last`: True
562
+ - `dataloader_num_workers`: 4
563
+ - `dataloader_pin_memory`: True
564
+ - `dataloader_persistent_workers`: False
565
+ - `dataloader_prefetch_factor`: None
566
+ - `remove_unused_columns`: True
567
+ - `label_names`: None
568
+ - `train_sampling_strategy`: random
569
+ - `length_column_name`: length
570
+ - `ddp_find_unused_parameters`: None
571
+ - `ddp_bucket_cap_mb`: None
572
+ - `ddp_broadcast_buffers`: False
573
+ - `ddp_static_graph`: None
574
+ - `ddp_backend`: None
575
+ - `ddp_timeout`: 1800
576
+ - `fsdp`: []
577
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
578
+ - `deepspeed`: None
579
+ - `debug`: []
580
+ - `skip_memory_metrics`: True
581
+ - `do_predict`: False
582
+ - `resume_from_checkpoint`: None
583
+ - `warmup_ratio`: None
584
+ - `local_rank`: -1
585
+ - `prompts`: None
586
+ - `batch_sampler`: batch_sampler
587
+ - `multi_dataset_batch_sampler`: proportional
588
+ - `router_mapping`: {}
589
+ - `learning_rate_mapping`: {}
590
+
591
+ </details>
592
+
593
+ ### Training Logs
594
+ | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_R100_ndcg@10 | NanoNFCorpus_R100_ndcg@10 | NanoNQ_R100_ndcg@10 | NanoFiQA2018_R100_ndcg@10 | NanoTouche2020_R100_ndcg@10 | NanoSciFact_R100_ndcg@10 | NanoHotpotQA_R100_ndcg@10 | NanoArguAna_R100_ndcg@10 | NanoFEVER_R100_ndcg@10 | NanoDBPedia_R100_ndcg@10 | NanoClimateFEVER_R100_ndcg@10 | NanoSCIDOCS_R100_ndcg@10 | NanoQuoraRetrieval_R100_ndcg@10 | NanoBEIR_R100_mean_ndcg@10 |
595
+ |:----------:|:----------:|:-------------:|:---------------:|:------------------------:|:-------------------------:|:--------------------:|:-------------------------:|:---------------------------:|:------------------------:|:-------------------------:|:------------------------:|:----------------------:|:------------------------:|:-----------------------------:|:------------------------:|:-------------------------------:|:--------------------------:|
596
+ | -1 | -1 | - | - | 0.0320 (-0.5085) | 0.2565 (-0.0686) | 0.0418 (-0.4588) | 0.0438 (-0.3936) | 0.1218 (-0.5720) | 0.0425 (-0.6674) | 0.1154 (-0.7123) | 0.0673 (-0.4215) | 0.0519 (-0.7575) | 0.1926 (-0.4217) | 0.0770 (-0.2408) | 0.0483 (-0.2868) | 0.0550 (-0.8137) | 0.0881 (-0.4864) |
597
+ | 0.0000 | 1 | 73.0701 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
598
+ | 0.0250 | 7002 | 3.8772 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
599
+ | 0.0500 | 14004 | 1.6962 | 1.3073 | 0.6372 (+0.0968) | 0.3971 (+0.0721) | 0.6898 (+0.1892) | 0.4969 (+0.0595) | 0.5659 (-0.1279) | 0.7500 (+0.0401) | 0.9324 (+0.1047) | 0.6589 (+0.1701) | 0.9412 (+0.1318) | 0.6867 (+0.0723) | 0.4683 (+0.1506) | 0.3544 (+0.0192) | 0.9462 (+0.0775) | 0.6558 (+0.0812) |
600
+ | 0.0750 | 21006 | 1.5204 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
601
+ | 0.1000 | 28008 | 1.4272 | 1.1940 | 0.6842 (+0.1438) | 0.4023 (+0.0773) | 0.7159 (+0.2152) | 0.5034 (+0.0659) | 0.5782 (-0.1156) | 0.7249 (+0.0150) | 0.9338 (+0.1061) | 0.6179 (+0.1291) | 0.9208 (+0.1114) | 0.6951 (+0.0808) | 0.5051 (+0.1873) | 0.3628 (+0.0277) | 0.9451 (+0.0764) | 0.6607 (+0.0862) |
602
+ | 0.1250 | 35010 | 1.3634 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
603
+ | 0.1500 | 42012 | 1.3163 | 1.0090 | 0.6605 (+0.1201) | 0.4069 (+0.0819) | 0.7260 (+0.2254) | 0.5327 (+0.0952) | 0.5682 (-0.1256) | 0.7211 (+0.0112) | 0.9302 (+0.1025) | 0.6496 (+0.1608) | 0.9053 (+0.0959) | 0.7033 (+0.0890) | 0.5055 (+0.1877) | 0.4028 (+0.0677) | 0.9455 (+0.0768) | 0.6660 (+0.0914) |
604
+ | 0.1750 | 49014 | 1.2788 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
605
+ | 0.2000 | 56016 | 1.2434 | 0.8689 | 0.6827 (+0.1422) | 0.4053 (+0.0803) | 0.7653 (+0.2647) | 0.4848 (+0.0473) | 0.5759 (-0.1179) | 0.7387 (+0.0288) | 0.9464 (+0.1187) | 0.6446 (+0.1558) | 0.9160 (+0.1066) | 0.7077 (+0.0934) | 0.5025 (+0.1848) | 0.3707 (+0.0356) | 0.9464 (+0.0777) | 0.6682 (+0.0937) |
606
+ | 0.2250 | 63018 | 1.2135 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
607
+ | 0.2500 | 70020 | 1.1868 | 0.8525 | 0.6714 (+0.1310) | 0.4041 (+0.0791) | 0.7533 (+0.2526) | 0.5034 (+0.0660) | 0.5982 (-0.0956) | 0.7458 (+0.0358) | 0.9383 (+0.1106) | 0.6642 (+0.1753) | 0.9295 (+0.1201) | 0.7090 (+0.0947) | 0.5150 (+0.1973) | 0.3674 (+0.0323) | 0.9483 (+0.0796) | 0.6729 (+0.0984) |
608
+ | 0.2750 | 77022 | 1.1614 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
609
+ | 0.3000 | 84024 | 1.1426 | 0.8028 | 0.6649 (+0.1245) | 0.4001 (+0.0751) | 0.7688 (+0.2682) | 0.5109 (+0.0735) | 0.5692 (-0.1246) | 0.7681 (+0.0582) | 0.9433 (+0.1156) | 0.7080 (+0.2191) | 0.9277 (+0.1183) | 0.6873 (+0.0730) | 0.5279 (+0.2102) | 0.3766 (+0.0415) | 0.9583 (+0.0896) | 0.6778 (+0.1032) |
610
+ | 0.3250 | 91026 | 1.1219 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
611
+ | 0.3500 | 98028 | 1.1001 | 0.7750 | 0.6879 (+0.1474) | 0.3860 (+0.0610) | 0.7553 (+0.2546) | 0.5386 (+0.1012) | 0.5828 (-0.1110) | 0.7089 (-0.0010) | 0.9363 (+0.1086) | 0.6587 (+0.1699) | 0.9346 (+0.1252) | 0.6903 (+0.0760) | 0.5269 (+0.2092) | 0.3950 (+0.0599) | 0.9485 (+0.0798) | 0.6731 (+0.0985) |
612
+ | 0.3750 | 105030 | 1.0853 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
613
+ | 0.4000 | 112032 | 1.0658 | 0.7609 | 0.6882 (+0.1478) | 0.4008 (+0.0757) | 0.7679 (+0.2673) | 0.5054 (+0.0680) | 0.5749 (-0.1189) | 0.7378 (+0.0279) | 0.9460 (+0.1182) | 0.6375 (+0.1486) | 0.9375 (+0.1281) | 0.6996 (+0.0852) | 0.5160 (+0.1983) | 0.3653 (+0.0302) | 0.9561 (+0.0874) | 0.6718 (+0.0972) |
614
+ | 0.4250 | 119034 | 1.0506 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
615
+ | 0.4500 | 126036 | 1.0362 | 0.8689 | 0.7079 (+0.1675) | 0.4064 (+0.0813) | 0.7693 (+0.2686) | 0.5118 (+0.0744) | 0.5916 (-0.1022) | 0.7200 (+0.0101) | 0.9439 (+0.1162) | 0.6686 (+0.1798) | 0.9202 (+0.1108) | 0.7056 (+0.0913) | 0.5211 (+0.2034) | 0.3740 (+0.0389) | 0.9611 (+0.0924) | 0.6770 (+0.1025) |
616
+ | 0.4750 | 133038 | 1.0205 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
617
+ | 0.5000 | 140040 | 1.0067 | 0.7718 | 0.6867 (+0.1463) | 0.3975 (+0.0725) | 0.7751 (+0.2745) | 0.4923 (+0.0548) | 0.5812 (-0.1126) | 0.7203 (+0.0104) | 0.9445 (+0.1168) | 0.6774 (+0.1886) | 0.9376 (+0.1282) | 0.6991 (+0.0847) | 0.5132 (+0.1954) | 0.3640 (+0.0289) | 0.9527 (+0.0841) | 0.6724 (+0.0979) |
618
+ | 0.5250 | 147042 | 0.9960 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
619
+ | 0.5500 | 154044 | 0.9820 | 0.7580 | 0.7104 (+0.1700) | 0.4012 (+0.0761) | 0.7787 (+0.2781) | 0.5099 (+0.0725) | 0.5716 (-0.1223) | 0.7429 (+0.0330) | 0.9416 (+0.1139) | 0.6791 (+0.1903) | 0.9490 (+0.1396) | 0.7085 (+0.0942) | 0.5066 (+0.1888) | 0.3756 (+0.0404) | 0.9569 (+0.0883) | 0.6794 (+0.1048) |
620
+ | 0.5750 | 161046 | 0.9715 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
621
+ | 0.6000 | 168048 | 0.9597 | 0.6707 | 0.6819 (+0.1415) | 0.3924 (+0.0674) | 0.7767 (+0.2760) | 0.5066 (+0.0692) | 0.5618 (-0.1320) | 0.7440 (+0.0341) | 0.9462 (+0.1185) | 0.6393 (+0.1505) | 0.9293 (+0.1199) | 0.7106 (+0.0963) | 0.5128 (+0.1951) | 0.3777 (+0.0426) | 0.9521 (+0.0834) | 0.6717 (+0.0971) |
622
+ | 0.6250 | 175050 | 0.9477 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
623
+ | 0.6500 | 182052 | 0.9395 | 0.7027 | 0.6909 (+0.1505) | 0.3866 (+0.0616) | 0.8037 (+0.3031) | 0.4867 (+0.0493) | 0.5710 (-0.1228) | 0.7571 (+0.0472) | 0.9436 (+0.1159) | 0.6785 (+0.1897) | 0.9329 (+0.1235) | 0.7195 (+0.1051) | 0.5204 (+0.2026) | 0.3750 (+0.0398) | 0.9480 (+0.0794) | 0.6780 (+0.1034) |
624
+ | 0.6750 | 189054 | 0.9304 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
625
+ | 0.7000 | 196056 | 0.9182 | 0.6734 | 0.7061 (+0.1657) | 0.3774 (+0.0523) | 0.7877 (+0.2871) | 0.5269 (+0.0895) | 0.5726 (-0.1212) | 0.7434 (+0.0335) | 0.9486 (+0.1209) | 0.6663 (+0.1775) | 0.9414 (+0.1320) | 0.7109 (+0.0965) | 0.5032 (+0.1855) | 0.3767 (+0.0416) | 0.9527 (+0.0841) | 0.6780 (+0.1035) |
626
+ | 0.7250 | 203058 | 0.9105 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
627
+ | 0.7500 | 210060 | 0.9006 | 0.6761 | 0.7077 (+0.1673) | 0.3643 (+0.0393) | 0.7947 (+0.2941) | 0.5503 (+0.1129) | 0.5676 (-0.1262) | 0.7360 (+0.0261) | 0.9515 (+0.1237) | 0.6716 (+0.1827) | 0.9415 (+0.1320) | 0.7142 (+0.0998) | 0.5285 (+0.2107) | 0.3721 (+0.0370) | 0.9510 (+0.0823) | 0.6808 (+0.1063) |
628
+ | 0.7750 | 217062 | 0.8912 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
629
+ | 0.8000 | 224064 | 0.8840 | 0.6350 | 0.7071 (+0.1667) | 0.3827 (+0.0577) | 0.7847 (+0.2841) | 0.5194 (+0.0819) | 0.5692 (-0.1246) | 0.7333 (+0.0234) | 0.9427 (+0.1150) | 0.6952 (+0.2064) | 0.9408 (+0.1314) | 0.7113 (+0.0970) | 0.5328 (+0.2150) | 0.3606 (+0.0254) | 0.9505 (+0.0819) | 0.6793 (+0.1047) |
630
+ | 0.8250 | 231066 | 0.8762 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
631
+ | 0.8500 | 238068 | 0.8673 | 0.6276 | 0.7218 (+0.1814) | 0.3709 (+0.0458) | 0.7783 (+0.2777) | 0.5525 (+0.1151) | 0.5689 (-0.1249) | 0.7517 (+0.0417) | 0.9411 (+0.1134) | 0.6744 (+0.1856) | 0.9427 (+0.1332) | 0.7094 (+0.0951) | 0.5338 (+0.2161) | 0.3612 (+0.0261) | 0.9544 (+0.0857) | 0.6816 (+0.1071) |
632
+ | 0.8750 | 245070 | 0.8583 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
633
+ | 0.9000 | 252072 | 0.8544 | 0.6251 | 0.7240 (+0.1836) | 0.3695 (+0.0445) | 0.7761 (+0.2755) | 0.5362 (+0.0987) | 0.5672 (-0.1267) | 0.7364 (+0.0265) | 0.9449 (+0.1172) | 0.6903 (+0.2015) | 0.9518 (+0.1424) | 0.7196 (+0.1053) | 0.5362 (+0.2185) | 0.3648 (+0.0296) | 0.9539 (+0.0852) | 0.6824 (+0.1078) |
634
+ | 0.9250 | 259074 | 0.8491 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
635
+ | **0.9501** | **266076** | **0.8423** | **0.6143** | **0.7129 (+0.1724)** | **0.3796 (+0.0546)** | **0.7821 (+0.2814)** | **0.5496 (+0.1122)** | **0.5681 (-0.1257)** | **0.7443 (+0.0344)** | **0.9451 (+0.1173)** | **0.6738 (+0.1849)** | **0.9513 (+0.1419)** | **0.7160 (+0.1017)** | **0.5380 (+0.2203)** | **0.3594 (+0.0243)** | **0.9526 (+0.0839)** | **0.6825 (+0.1080)** |
636
+ | 0.9751 | 273078 | 0.8392 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
637
+ | 1.0 | 280065 | - | 0.6013 | 0.7111 (+0.1707) | 0.3777 (+0.0526) | 0.7873 (+0.2866) | 0.5401 (+0.1027) | 0.5663 (-0.1275) | 0.7433 (+0.0334) | 0.9501 (+0.1224) | 0.6787 (+0.1898) | 0.9512 (+0.1418) | 0.7178 (+0.1034) | 0.5251 (+0.2074) | 0.3569 (+0.0218) | 0.9538 (+0.0852) | 0.6815 (+0.1069) |
638
+
639
+ * The bold row denotes the saved checkpoint.
640
+
641
+ ### Training Time
642
+ - **Training**: 5.2 hours
643
+ - **Evaluation**: 8.5 minutes
644
+ - **Total**: 5.3 hours
645
+
646
+ ### Framework Versions
647
+ - Python: 3.11.15
648
+ - Sentence Transformers: 5.4.1
649
+ - Transformers: 5.7.0
650
+ - PyTorch: 2.7.0+cu126
651
+ - Accelerate: 1.13.0
652
+ - Datasets: 4.8.5
653
+ - Tokenizers: 0.22.2
654
+
655
+ ## Citation
656
+
657
+ ### BibTeX
658
+
659
+ #### Sentence Transformers
660
+ ```bibtex
661
+ @inproceedings{reimers-2019-sentence-bert,
662
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
663
+ author = "Reimers, Nils and Gurevych, Iryna",
664
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
665
+ month = "11",
666
+ year = "2019",
667
+ publisher = "Association for Computational Linguistics",
668
+ url = "https://arxiv.org/abs/1908.10084",
669
+ }
670
+ ```
671
+
672
+ <!--
673
+ ## Glossary
674
+
675
+ *Clearly define terms in order to be accessible across audiences.*
676
+ -->
677
+
678
+ <!--
679
+ ## Model Card Authors
680
+
681
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
682
+ -->
683
+
684
+ <!--
685
+ ## Model Card Contact
686
+
687
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
688
+ -->
config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "causal_mask": false,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "mean",
13
+ "cls_token_id": 50281,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "dtype": "float32",
17
+ "embedding_dropout": 0.0,
18
+ "eos_token_id": null,
19
+ "global_attn_every_n_layers": 3,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 384,
23
+ "id2label": {
24
+ "0": "LABEL_0"
25
+ },
26
+ "initializer_cutoff_factor": 2.0,
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 576,
29
+ "is_causal": false,
30
+ "label2id": {
31
+ "LABEL_0": 0
32
+ },
33
+ "layer_norm_eps": 1e-05,
34
+ "layer_types": [
35
+ "full_attention",
36
+ "sliding_attention",
37
+ "sliding_attention",
38
+ "full_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "full_attention",
42
+ "sliding_attention",
43
+ "sliding_attention",
44
+ "full_attention"
45
+ ],
46
+ "local_attention": 128,
47
+ "max_position_embeddings": 7999,
48
+ "mlp_bias": false,
49
+ "mlp_dropout": 0.0,
50
+ "model_type": "modernbert",
51
+ "norm_bias": false,
52
+ "norm_eps": 1e-05,
53
+ "num_attention_heads": 6,
54
+ "num_hidden_layers": 10,
55
+ "pad_token_id": 50283,
56
+ "position_embedding_type": "sans_pos",
57
+ "rope_parameters": {
58
+ "full_attention": {
59
+ "rope_theta": 160000.0,
60
+ "rope_type": "default"
61
+ },
62
+ "sliding_attention": {
63
+ "rope_theta": 160000.0,
64
+ "rope_type": "default"
65
+ }
66
+ },
67
+ "sep_token_id": 50282,
68
+ "sparse_pred_ignore_index": -100,
69
+ "sparse_prediction": false,
70
+ "tie_word_embeddings": true,
71
+ "transformers_version": "5.7.0",
72
+ "vocab_size": 50368
73
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "pytorch": "2.7.0+cu126",
4
+ "sentence_transformers": "5.4.1",
5
+ "transformers": "5.7.0"
6
+ },
7
+ "activation_fn": "torch.nn.modules.linear.Identity",
8
+ "default_prompt_name": null,
9
+ "model_type": "CrossEncoder",
10
+ "prompts": {}
11
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb540f3ad6cd4d4e4c5cfe3be48586f1fd50ae80578f2d430d1b74f5d52d95e
3
+ size 127538496
modules.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.base.modules.transformer.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.sentence_transformer.modules.pooling.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.base.modules.dense.Dense"
19
+ },
20
+ {
21
+ "idx": 3,
22
+ "name": "3",
23
+ "path": "3_LayerNorm",
24
+ "type": "sentence_transformers.sentence_transformer.modules.layer_norm.LayerNorm"
25
+ },
26
+ {
27
+ "idx": 4,
28
+ "name": "4",
29
+ "path": "4_Dense",
30
+ "type": "sentence_transformers.base.modules.dense.Dense"
31
+ }
32
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transformer_task": "feature-extraction",
3
+ "modality_config": {
4
+ "text": {
5
+ "method": "forward",
6
+ "method_output_name": "last_hidden_state"
7
+ }
8
+ },
9
+ "module_output_name": "token_embeddings"
10
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "is_local": true,
6
+ "local_files_only": false,
7
+ "mask_token": "[MASK]",
8
+ "max_length": 7999,
9
+ "model_input_names": [
10
+ "input_ids",
11
+ "attention_mask"
12
+ ],
13
+ "model_max_length": 7999,
14
+ "pad_token": "[PAD]",
15
+ "sep_token": "[SEP]",
16
+ "stride": 0,
17
+ "tokenizer_class": "TokenizersBackend",
18
+ "truncation_side": "right",
19
+ "truncation_strategy": "longest_first",
20
+ "unk_token": "[UNK]"
21
+ }