Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
Paper • 1908.10084 • Published • 12
This is a Cross Encoder model finetuned from FacebookAI/roberta-base on the weak-labels-wiki dataset using the sentence-transformers library. It computes scores for pairs of texts, which can be used for text reranking and semantic search.
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import CrossEncoder
# Download from the 🤗 Hub
model = CrossEncoder("cross_encoder_model_id")
# Get scores for pairs of texts
pairs = [
['q247925', 'linux__Watchdog_timer_P0020'],
['q216149', 'smartphones__Samsung_Galaxy_Z_Flip_P0003'],
['q174658', 'great_depression__Great_Depression_P0184'],
['q062615', 'barack_obama__2008_Democratic_Party_vice_presidential_candidate_selection_P0000'],
['q054198', 'donald_trump__Business_projects_of_Donald_Trump_in_Russia_P0007'],
]
scores = model.predict(pairs)
print(scores.shape)
# (5,)
# Or rank different texts based on similarity to a single text
ranks = model.rank(
'q247925',
[
'linux__Watchdog_timer_P0020',
'smartphones__Samsung_Galaxy_Z_Flip_P0003',
'great_depression__Great_Depression_P0184',
'barack_obama__2008_Democratic_Party_vice_presidential_candidate_selection_P0000',
'donald_trump__Business_projects_of_Donald_Trump_in_Russia_P0007',
]
)
# [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
query_id, pos_doc_id, neg_doc_id, neg_kind, sentence_0_input_ids, sentence_0_attention_mask, sentence_1_input_ids, and sentence_1_attention_mask| query_id | pos_doc_id | neg_doc_id | neg_kind | sentence_0_input_ids | sentence_0_attention_mask | sentence_1_input_ids | sentence_1_attention_mask | |
|---|---|---|---|---|---|---|---|---|
| type | string | string | string | string | torch.Tensor | torch.Tensor | torch.Tensor | torch.Tensor |
| details |
|
|
|
|
| query_id | pos_doc_id | neg_doc_id | neg_kind | sentence_0_input_ids | sentence_0_attention_mask | sentence_1_input_ids | sentence_1_attention_mask |
|---|---|---|---|---|---|---|---|
q187533 |
deep_learning__Deep_Learning_Super_Sampling_P0000 |
deep_learning__Neural_processing_unit_P0000 |
hard |
tensor([ 0, 35166, 2239, 18520, 998, 2, 2, 35166, 13807, 1582, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
tensor([ 0, 35166, 2239, 18520, 998, 2, 2, 250, 26739, 5774, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
q248783 |
linux__64-bit_computing_P0061 |
linux__Microkernel_P0009 |
hard |
tensor([ 0, 11828, 1322, 15826, 8, 32469, 13, 709, 6216, 2, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
tensor([ 0, 11828, 1322, 15826, 8, 32469, 13, 709, 6216, 2, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
q091993 |
youtube__Amazon_Prime_Video_P0005 |
napoleon__Horrible_Histories__2009_TV_series__P0063 |
hard |
tensor([ 0, 10836, 38, 4016, 10, 623, 1771, 3082, 6717, 15, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
tensor([ 0, 10836, 38, 4016, 10, 623, 1771, 3082, 6717, 15, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
main.PairwiseMarginLossquery_id, pos_doc_id, neg_doc_id, neg_kind, sentence_0_input_ids, sentence_0_attention_mask, sentence_1_input_ids, and sentence_1_attention_mask| query_id | pos_doc_id | neg_doc_id | neg_kind | sentence_0_input_ids | sentence_0_attention_mask | sentence_1_input_ids | sentence_1_attention_mask | |
|---|---|---|---|---|---|---|---|---|
| type | string | string | string | string | torch.Tensor | torch.Tensor | torch.Tensor | torch.Tensor |
| details |
|
|
|
|
| query_id | pos_doc_id | neg_doc_id | neg_kind | sentence_0_input_ids | sentence_0_attention_mask | sentence_1_input_ids | sentence_1_attention_mask |
|---|---|---|---|---|---|---|---|
q247925 |
linux__Watchdog_timer_P0020 |
linux__Dd__Unix__P0017 |
hard |
tensor([ 0, 6179, 473, 15826, 3679, 467, 12328, 8, 414, 2752, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
tensor([ 0, 6179, 473, 15826, 3679, 467, 12328, 8, 414, 2752, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
q216149 |
smartphones__Samsung_Galaxy_Z_Flip_P0003 |
smartphones__Samsung_Galaxy_Z_Fold_4_P0000 |
hard |
tensor([ 0, 23242, 1851, 5, 806, 639, 14789, 868, 7466, 2, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
tensor([ 0, 23242, 1851, 5, 806, 639, 14789, 868, 7466, 2, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
q174658 |
great_depression__Great_Depression_P0184 |
great_depression__The_Great_Depression__America__1929_1941_P0003 |
hard |
tensor([ 0, 19065, 23384, 5984, 1915, 998, 2, 2, 38261, 9, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
tensor([ 0, 19065, 23384, 5984, 1915, 998, 2, 2, 42425, 49, |
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
main.PairwiseMarginLosseval_strategy: stepsper_device_train_batch_size: 290per_device_eval_batch_size: 128learning_rate: 1e-05weight_decay: 0.02max_grad_norm: 0.5num_train_epochs: 10.0lr_scheduler_type: cosinewarmup_ratio: 0.03bf16: Truedataloader_num_workers: 6remove_unused_columns: Falseload_best_model_at_end: Trueoptim: adamw_torchdataloader_persistent_workers: Trueoverwrite_output_dir: Falsedo_predict: Falseeval_strategy: stepsprediction_loss_only: Trueper_device_train_batch_size: 290per_device_eval_batch_size: 128per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 1eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 1e-05weight_decay: 0.02adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 0.5num_train_epochs: 10.0max_steps: -1lr_scheduler_type: cosinelr_scheduler_kwargs: {}warmup_ratio: 0.03warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falsebf16: Truefp16: Falsefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 6dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Falselabel_names: Noneload_best_model_at_end: Trueignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}parallelism_config: Nonedeepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torchoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthproject: huggingfacetrackio_space_id: trackioddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Trueskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsehub_revision: Nonegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: noneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseliger_kernel_config: Noneeval_use_gather_object: Falseaverage_tokens_across_devices: Trueprompts: Nonebatch_sampler: batch_samplermulti_dataset_batch_sampler: proportionalrouter_mapping: {}learning_rate_mapping: {}| Epoch | Step | Training Loss | Validation Loss |
|---|---|---|---|
| 0.0067 | 50 | 1.0015 | - |
| 0.0135 | 100 | 1.0 | - |
| 0.0202 | 150 | 0.9995 | - |
| 0.0269 | 200 | 0.9982 | - |
| 0.0337 | 250 | 0.9965 | - |
| 0.0404 | 300 | 0.9942 | - |
| 0.0472 | 350 | 0.9858 | - |
| 0.0539 | 400 | 0.9546 | - |
| 0.0606 | 450 | 0.8242 | - |
| 0.0674 | 500 | 0.7001 | 0.5555 |
| 0.0741 | 550 | 0.5356 | - |
| 0.0808 | 600 | 0.4442 | - |
| 0.0876 | 650 | 0.3845 | - |
| 0.0943 | 700 | 0.3461 | - |
| 0.1010 | 750 | 0.3281 | - |
| 0.1078 | 800 | 0.3069 | - |
| 0.1145 | 850 | 0.2846 | - |
| 0.1212 | 900 | 0.2713 | - |
| 0.1280 | 950 | 0.255 | - |
| 0.1347 | 1000 | 0.2473 | 0.1868 |
| 0.1415 | 1050 | 0.2319 | - |
| 0.1482 | 1100 | 0.22 | - |
| 0.1549 | 1150 | 0.2166 | - |
| 0.1617 | 1200 | 0.2087 | - |
| 0.1684 | 1250 | 0.1947 | - |
| 0.1751 | 1300 | 0.1929 | - |
| 0.1819 | 1350 | 0.1915 | - |
| 0.1886 | 1400 | 0.1923 | - |
| 0.1953 | 1450 | 0.1847 | - |
| 0.2021 | 1500 | 0.179 | 0.1380 |
| 0.2088 | 1550 | 0.1783 | - |
| 0.2155 | 1600 | 0.1691 | - |
| 0.2223 | 1650 | 0.1688 | - |
| 0.2290 | 1700 | 0.1626 | - |
| 0.2358 | 1750 | 0.1585 | - |
| 0.2425 | 1800 | 0.1579 | - |
| 0.2492 | 1850 | 0.1576 | - |
| 0.2560 | 1900 | 0.1466 | - |
| 0.2627 | 1950 | 0.1518 | - |
| 0.2694 | 2000 | 0.1612 | 0.1121 |
| 0.2762 | 2050 | 0.1465 | - |
| 0.2829 | 2100 | 0.1395 | - |
| 0.2896 | 2150 | 0.1372 | - |
| 0.2964 | 2200 | 0.1323 | - |
| 0.3031 | 2250 | 0.1312 | - |
| 0.3098 | 2300 | 0.1324 | - |
| 0.3166 | 2350 | 0.1329 | - |
| 0.3233 | 2400 | 0.1284 | - |
| 0.3301 | 2450 | 0.123 | - |
| 0.3368 | 2500 | 0.1144 | 0.0986 |
| 0.3435 | 2550 | 0.1214 | - |
| 0.3503 | 2600 | 0.1283 | - |
| 0.3570 | 2650 | 0.1207 | - |
| 0.3637 | 2700 | 0.1196 | - |
| 0.3705 | 2750 | 0.1155 | - |
| 0.3772 | 2800 | 0.1112 | - |
| 0.3839 | 2850 | 0.1202 | - |
| 0.3907 | 2900 | 0.1117 | - |
| 0.3974 | 2950 | 0.1063 | - |
| 0.4041 | 3000 | 0.111 | 0.0829 |
| 0.4109 | 3050 | 0.108 | - |
| 0.4176 | 3100 | 0.1094 | - |
| 0.4244 | 3150 | 0.1072 | - |
| 0.4311 | 3200 | 0.0976 | - |
| 0.4378 | 3250 | 0.1076 | - |
| 0.4446 | 3300 | 0.1049 | - |
| 0.4513 | 3350 | 0.1109 | - |
| 0.4580 | 3400 | 0.1043 | - |
| 0.4648 | 3450 | 0.1021 | - |
| 0.4715 | 3500 | 0.1034 | 0.0759 |
| 0.4782 | 3550 | 0.1036 | - |
| 0.4850 | 3600 | 0.0975 | - |
| 0.4917 | 3650 | 0.097 | - |
| 0.4985 | 3700 | 0.0943 | - |
| 0.5052 | 3750 | 0.0911 | - |
| 0.5119 | 3800 | 0.0974 | - |
| 0.5187 | 3850 | 0.0937 | - |
| 0.5254 | 3900 | 0.0934 | - |
| 0.5321 | 3950 | 0.0931 | - |
| 0.5389 | 4000 | 0.0884 | 0.0655 |
| 0.5456 | 4050 | 0.0868 | - |
| 0.5523 | 4100 | 0.0918 | - |
| 0.5591 | 4150 | 0.0887 | - |
| 0.5658 | 4200 | 0.0879 | - |
| 0.5725 | 4250 | 0.0863 | - |
| 0.5793 | 4300 | 0.0886 | - |
| 0.5860 | 4350 | 0.0865 | - |
| 0.5928 | 4400 | 0.0839 | - |
| 0.5995 | 4450 | 0.089 | - |
| 0.6062 | 4500 | 0.0806 | 0.0617 |
| 0.6130 | 4550 | 0.0839 | - |
| 0.6197 | 4600 | 0.084 | - |
| 0.6264 | 4650 | 0.0836 | - |
| 0.6332 | 4700 | 0.0753 | - |
| 0.6399 | 4750 | 0.0779 | - |
| 0.6466 | 4800 | 0.0825 | - |
| 0.6534 | 4850 | 0.082 | - |
| 0.6601 | 4900 | 0.0818 | - |
| 0.6668 | 4950 | 0.078 | - |
| 0.6736 | 5000 | 0.0731 | 0.0581 |
| 0.6803 | 5050 | 0.0756 | - |
| 0.6871 | 5100 | 0.0758 | - |
| 0.6938 | 5150 | 0.0738 | - |
| 0.7005 | 5200 | 0.0715 | - |
| 0.7073 | 5250 | 0.0725 | - |
| 0.7140 | 5300 | 0.0718 | - |
| 0.7207 | 5350 | 0.0755 | - |
| 0.7275 | 5400 | 0.071 | - |
| 0.7342 | 5450 | 0.0723 | - |
| 0.7409 | 5500 | 0.0766 | 0.0520 |
| 0.7477 | 5550 | 0.0708 | - |
| 0.7544 | 5600 | 0.0766 | - |
| 0.7611 | 5650 | 0.0692 | - |
| 0.7679 | 5700 | 0.0731 | - |
| 0.7746 | 5750 | 0.066 | - |
| 0.7814 | 5800 | 0.0733 | - |
| 0.7881 | 5850 | 0.0649 | - |
| 0.7948 | 5900 | 0.0678 | - |
| 0.8016 | 5950 | 0.0707 | - |
| 0.8083 | 6000 | 0.0722 | 0.0515 |
| 0.8150 | 6050 | 0.0671 | - |
| 0.8218 | 6100 | 0.0724 | - |
| 0.8285 | 6150 | 0.0691 | - |
| 0.8352 | 6200 | 0.0761 | - |
| 0.8420 | 6250 | 0.0653 | - |
| 0.8487 | 6300 | 0.0629 | - |
| 0.8554 | 6350 | 0.0647 | - |
| 0.8622 | 6400 | 0.0675 | - |
| 0.8689 | 6450 | 0.0661 | - |
| 0.8757 | 6500 | 0.0622 | 0.0457 |
| 0.8824 | 6550 | 0.0646 | - |
| 0.8891 | 6600 | 0.0626 | - |
| 0.8959 | 6650 | 0.0672 | - |
| 0.9026 | 6700 | 0.0628 | - |
| 0.9093 | 6750 | 0.0624 | - |
| 0.9161 | 6800 | 0.0637 | - |
| 0.9228 | 6850 | 0.0632 | - |
| 0.9295 | 6900 | 0.0632 | - |
| 0.9363 | 6950 | 0.0648 | - |
| 0.9430 | 7000 | 0.0628 | 0.0427 |
| 0.9498 | 7050 | 0.0592 | - |
| 0.9565 | 7100 | 0.0619 | - |
| 0.9632 | 7150 | 0.0602 | - |
| 0.9700 | 7200 | 0.0605 | - |
| 0.9767 | 7250 | 0.0582 | - |
| 0.9834 | 7300 | 0.0612 | - |
| 0.9902 | 7350 | 0.0593 | - |
| 0.9969 | 7400 | 0.0594 | - |
| 1.0036 | 7450 | 0.0574 | - |
| 1.0104 | 7500 | 0.0506 | 0.0388 |
| 1.0171 | 7550 | 0.0467 | - |
| 1.0238 | 7600 | 0.0496 | - |
| 1.0306 | 7650 | 0.0485 | - |
| 1.0373 | 7700 | 0.0489 | - |
| 1.0441 | 7750 | 0.054 | - |
| 1.0508 | 7800 | 0.0489 | - |
| 1.0575 | 7850 | 0.051 | - |
| 1.0643 | 7900 | 0.0497 | - |
| 1.0710 | 7950 | 0.0493 | - |
| 1.0777 | 8000 | 0.0519 | 0.0409 |
| 1.0845 | 8050 | 0.048 | - |
| 1.0912 | 8100 | 0.0468 | - |
| 1.0979 | 8150 | 0.0543 | - |
| 1.1047 | 8200 | 0.0444 | - |
| 1.1114 | 8250 | 0.0507 | - |
| 1.1181 | 8300 | 0.0468 | - |
| 1.1249 | 8350 | 0.0506 | - |
| 1.1316 | 8400 | 0.0498 | - |
| 1.1384 | 8450 | 0.0472 | - |
| 1.1451 | 8500 | 0.0467 | 0.0380 |
| 1.1518 | 8550 | 0.0446 | - |
| 1.1586 | 8600 | 0.0492 | - |
| 1.1653 | 8650 | 0.0445 | - |
| 1.1720 | 8700 | 0.0487 | - |
| 1.1788 | 8750 | 0.0424 | - |
| 1.1855 | 8800 | 0.0446 | - |
| 1.1922 | 8850 | 0.0471 | - |
| 1.1990 | 8900 | 0.0516 | - |
| 1.2057 | 8950 | 0.0493 | - |
| 1.2124 | 9000 | 0.0482 | 0.0374 |
| 1.2192 | 9050 | 0.0442 | - |
| 1.2259 | 9100 | 0.0441 | - |
| 1.2327 | 9150 | 0.0471 | - |
| 1.2394 | 9200 | 0.047 | - |
| 1.2461 | 9250 | 0.0445 | - |
| 1.2529 | 9300 | 0.0468 | - |
| 1.2596 | 9350 | 0.0493 | - |
| 1.2663 | 9400 | 0.0453 | - |
| 1.2731 | 9450 | 0.0478 | - |
| 1.2798 | 9500 | 0.0417 | 0.0352 |
| 1.2865 | 9550 | 0.0452 | - |
| 1.2933 | 9600 | 0.0438 | - |
| 1.3000 | 9650 | 0.0419 | - |
| 1.3067 | 9700 | 0.0499 | - |
| 1.3135 | 9750 | 0.0414 | - |
| 1.3202 | 9800 | 0.0429 | - |
| 1.3270 | 9850 | 0.0443 | - |
| 1.3337 | 9900 | 0.0403 | - |
| 1.3404 | 9950 | 0.0466 | - |
| 1.3472 | 10000 | 0.0439 | 0.0323 |
| 1.3539 | 10050 | 0.0469 | - |
| 1.3606 | 10100 | 0.0459 | - |
| 1.3674 | 10150 | 0.0441 | - |
| 1.3741 | 10200 | 0.0428 | - |
| 1.3808 | 10250 | 0.0396 | - |
| 1.3876 | 10300 | 0.0406 | - |
| 1.3943 | 10350 | 0.0433 | - |
| 1.4011 | 10400 | 0.0421 | - |
| 1.4078 | 10450 | 0.0438 | - |
| 1.4145 | 10500 | 0.041 | 0.0310 |
| 1.4213 | 10550 | 0.0406 | - |
| 1.4280 | 10600 | 0.0386 | - |
| 1.4347 | 10650 | 0.0453 | - |
| 1.4415 | 10700 | 0.0429 | - |
| 1.4482 | 10750 | 0.0421 | - |
| 1.4549 | 10800 | 0.0385 | - |
| 1.4617 | 10850 | 0.0442 | - |
| 1.4684 | 10900 | 0.0402 | - |
| 1.4751 | 10950 | 0.0416 | - |
| 1.4819 | 11000 | 0.0433 | 0.0311 |
| 1.4886 | 11050 | 0.0404 | - |
| 1.4954 | 11100 | 0.0424 | - |
| 1.5021 | 11150 | 0.0373 | - |
| 1.5088 | 11200 | 0.0432 | - |
| 1.5156 | 11250 | 0.0364 | - |
| 1.5223 | 11300 | 0.0414 | - |
| 1.5290 | 11350 | 0.0409 | - |
| 1.5358 | 11400 | 0.0397 | - |
| 1.5425 | 11450 | 0.0425 | - |
| 1.5492 | 11500 | 0.0407 | 0.0308 |
| 1.5560 | 11550 | 0.0388 | - |
| 1.5627 | 11600 | 0.0396 | - |
| 1.5694 | 11650 | 0.037 | - |
| 1.5762 | 11700 | 0.0422 | - |
| 1.5829 | 11750 | 0.0406 | - |
| 1.5897 | 11800 | 0.0428 | - |
| 1.5964 | 11850 | 0.0387 | - |
| 1.6031 | 11900 | 0.0398 | - |
| 1.6099 | 11950 | 0.0405 | - |
| 1.6166 | 12000 | 0.0352 | 0.0297 |
| 1.6233 | 12050 | 0.0418 | - |
| 1.6301 | 12100 | 0.0374 | - |
| 1.6368 | 12150 | 0.0393 | - |
| 1.6435 | 12200 | 0.037 | - |
| 1.6503 | 12250 | 0.0402 | - |
| 1.6570 | 12300 | 0.0358 | - |
| 1.6637 | 12350 | 0.0331 | - |
| 1.6705 | 12400 | 0.0413 | - |
| 1.6772 | 12450 | 0.0382 | - |
| 1.684 | 12500 | 0.0409 | 0.0257 |
| 1.6907 | 12550 | 0.0388 | - |
| 1.6974 | 12600 | 0.0348 | - |
| 1.7042 | 12650 | 0.0345 | - |
| 1.7109 | 12700 | 0.0338 | - |
| 1.7176 | 12750 | 0.041 | - |
| 1.7244 | 12800 | 0.0369 | - |
| 1.7311 | 12850 | 0.0354 | - |
| 1.7378 | 12900 | 0.0384 | - |
| 1.7446 | 12950 | 0.0362 | - |
| 1.7513 | 13000 | 0.0336 | 0.0273 |
| 1.7580 | 13050 | 0.038 | - |
| 1.7648 | 13100 | 0.0387 | - |
| 1.7715 | 13150 | 0.0399 | - |
| 1.7783 | 13200 | 0.0355 | - |
| 1.7850 | 13250 | 0.0368 | - |
| 1.7917 | 13300 | 0.037 | - |
| 1.7985 | 13350 | 0.0347 | - |
| 1.8052 | 13400 | 0.0331 | - |
| 1.8119 | 13450 | 0.0362 | - |
| 1.8187 | 13500 | 0.0378 | 0.0258 |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
Base model
FacebookAI/roberta-base