metadata

base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
datasets: []
language: []
library_name: sentence-transformers
metrics:
  - cosine_accuracy
  - cosine_accuracy_threshold
  - cosine_f1
  - cosine_f1_threshold
  - cosine_precision
  - cosine_recall
  - cosine_ap
  - dot_accuracy
  - dot_accuracy_threshold
  - dot_f1
  - dot_f1_threshold
  - dot_precision
  - dot_recall
  - dot_ap
  - manhattan_accuracy
  - manhattan_accuracy_threshold
  - manhattan_f1
  - manhattan_f1_threshold
  - manhattan_precision
  - manhattan_recall
  - manhattan_ap
  - euclidean_accuracy
  - euclidean_accuracy_threshold
  - euclidean_f1
  - euclidean_f1_threshold
  - euclidean_precision
  - euclidean_recall
  - euclidean_ap
  - max_accuracy
  - max_accuracy_threshold
  - max_f1
  - max_f1_threshold
  - max_precision
  - max_recall
  - max_ap
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:450000
  - loss:ContrastiveLoss
widget:
  - source_sentence: گوشی a 21 s
    sentences:
      - FIFA 21 اکانت قانونی FIFA 21 Standard Edition مخصوص XBOX Series S/X
      - روکش صندلی چرم طرح بی ام و مناسب پژو پارس صندلی قدیم کد BMW69
      - >-
        هایلایتر پودری وت اند وایلد مدل مگا گلو شماره E319B هایلایتر پودری مگا
        گلو وت اند وایلد مدل E319B Blossom Glow wet n wild megaglo highlighting
        powder هایلایتر پودری مگا گلو وت اند وایلد مدل E321B Precious Petals
        هایلایتر وت اند وایلد پودری مگا گلو هایلایتر پودری مگا گلو شماره 319B وت
        اند وایلد / هایلایتر پودری مگا گلو وت اند وایلد هایلایتر پودری مگا گلو
        شماره 321B وت اند وایلد هایلایتر وت‌ اند وایلد | هایلایتر پودری وت اند
        وایلید megaglot هایلایتر پودری وت اند وایلد مگا گلو
  - source_sentence: استویا
    sentences:
      - >-
        ساعت گارمین مدل GARMIN FORERUNNER 35 GREEN Smart Watch Garmin Watch
        forerunner 35 green ساعت گارمين Forerunner 35 ساعت مچی هوشمند گارمین
        forerunner 35 green
      - برگ استویا
      - >-
        تاچ و ال سی دی شیائومی ردمی مدل نوت 8 پرو تاچ و ال سی دی شیائومی REDMI
        NOTE 8 PRO تاچ و ال سی دی گوشی شائومی ردمی نوت 8 پرو LCD XIAOMI REDMI
        NOTE 8 PRO
  - source_sentence: سنباده برقی
    sentences:
      - >-
        گوشت کوب برقی سه کاره میگل مدل GHB 801 سفید گوشتکوب برقی چند کاره میگل
        غذاساز دستی GHB 801 میگل Migel GHB 801 Food Processor گوشت کوب برقی GHB
        801 سفيد ميگل غذاساز دستی میگل مشکی مدل GHB 801 گوشتکوب برقی میگل GHB
        801 غذاساز میگل مدل GHB 801 (سفید) غذاساز دستی میگل مدل GHB 801 گوشت کوب
        برقی سه کاره میگل مدل GHB 801 مشکی غذاساز میگل مدل GHB 801 غذاساز  میگل
        مدل GHB 801 غذاساز دستی میگل مدل GHB801 W غذاساز دستی میگل سفید مدل GHB
        801 گوشت کوب برقی میگل مدل GHB 801 غذاساز میگل مدل GHB 801 ا Migel GHB
        801 Food Processor گوشت کوب برقی میگل GHB801
      - >-
        کلگی شارژر دو پورت تسکو با کابل میکرو TTC 57 کلگی شارژر تسکو مدل TSCO –
        TTC57 به همراه کابل MICRO USB شارژر دیواری تسکو مدل TTC 57 با کابل
        micro-USB شارژر دیواری دو پورت تسکو مدل TTC 57 شارژر دیواری TTC 57 تسکو
        شارژر دیواری تسکو مدل ttc 57 گلد و نقره ای شارژر دیواری تسکو مدل TTC57
        شارژر 2 پورت تسکو TTC 57 tsco ttc 57 wall charger شارژر دیواری تسکو مدل
        TTC 57 به همراه کابل microUSB شارژر دیواری تسکو TSCO TTC 57 به همراه
        کابل MicroUSB شارژر دیواری تسکو مدل WALL CHARGER TTC-57 شارژر دیواری TTC
        57 به همراه کابل تبدیل microUSB Tsco TTC 57 Wall charger With MicroUSB
        Conversion Cable شارژر دیواری تسکو TTC 57 شارژر دیواری تسکو TSCO TTC 57
        + کابل شارژر دیواری تسکو مدل TTC 57 با کابل MicroUSB شارژر دیواری TTC 57
        TSCO TTC 57 Wall Charger with microUSB Cable شارژر دیواری تسکو(TSCO
        TTC57 ) شارژر دیواری تسکو مدل TTC 57 شارژر دیواری به همراه کابل تبدیل
        microUSB تسکو مدل TSCO TTC 57 + دو پورت USB TSCO TTC 57 2.4A Wall
        Charger TSCO TTC 57 Wall Charger شارژر دیواری تسکو مدلTTC57  به همراه
        کابل شارژ شارژر دیواری TSCO TTC57 + کابل میکرو یو اس بی شارژر دیواری
        تسکو مدل TTC 57 به همراه با کابل microUSB Tsco TTC57 CHarger Fast with
        Micro Cable شارژر دیواری TSCO TTC57  کابل میکرو یو اس بی گارانتی یکساله
        شارژر TSCO مدل TTC 57 به همراه کابل MICRO شارژر دیواری سه پورت تسکو مدل
        TTC57 شارژر دیواری تسکو مدل TTC 57 به همراه کابل شارژ  microUSB شارژر
        دیواری تسکو مدل TSCO TTC 57 به همراه کابل تبدیل microUSB WALL CHARGER
        TTC 57 شارژر دیواری تسکو مدل TTC 57 به همراه کابل تبدیل microUSB TSCO
        TTC 57 شارژر دیواری تسکوttc 57
      - سنباده برقی ایکس کورت XSF02-180S
  - source_sentence: میز تنیس
    sentences:
      - >-
        کاندوم شیت کلوپلاست سایز ۳۰ میلی‌متر کاندوم شیت کانوین کلوپلاست کاندوم
        شیت Espi سایز ۳۰ میلی‌متر کاندوم شیت کلوپلاست مدل کانوین کاندوم شیت
        کلوپلاست کاندوم شیت کلوپلاست سایز 30 کاندوم شیت کولوپلاست 30 میلی متر
        Coloplast Freedom Clear کاندوم شیت سایز 25 میلی متر کلوپلاست coloplast
        ساخت دانمارک کاندوم شیت لاتکس کلوپلاست کاندوم شیت دانمارکی کاندوم شیت
        کلوپلاست - coloplast کاندوم شیت سایز 30 میلی متر کلوپلاست coloplast ساخت
        دانمارک کاندوم شیت کاندوم شیت کلوپلاست در سایزبندی کاندوم شیت 30میلی
        لیتر کاندوم شیت کلوپلاست coloplast دانمارکی
      - کاور گوشی سامسونگ A70 - A70S سه بعدی کد5
      - میز تنیس روی میز مدل Horse TT11
  - source_sentence: 'هندزفری بلوتوث جبرا '
    sentences:
      - آرمیچر دریل رونیکس 2210
      - >-
        هدست بلوتوث جبرا Mini هندزفری بلوتوث جبرا Jabra Mini Bluetooth Handsfree
        هدست بلوتوث جبرا  مدل Mini هندزفری بلوتوث جبرا MINI
      - گاز پیک نیک 5 کیلویی شیدا گاز
model-index:
  - name: >-
      SentenceTransformer based on
      sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
    results:
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: Unknown
          type: unknown
        metrics:
          - type: cosine_accuracy
            value: 0.85498
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.7729779481887817
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.8740799339616153
            name: Cosine F1
          - type: cosine_f1_threshold
            value: 0.7386565208435059
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.8376623376623377
            name: Cosine Precision
          - type: cosine_recall
            value: 0.9138079827400216
            name: Cosine Recall
          - type: cosine_ap
            value: 0.9043744924756869
            name: Cosine Ap
          - type: dot_accuracy
            value: 0.81168
            name: Dot Accuracy
          - type: dot_accuracy_threshold
            value: 18.684463500976562
            name: Dot Accuracy Threshold
          - type: dot_f1
            value: 0.8382417731385773
            name: Dot F1
          - type: dot_f1_threshold
            value: 18.00467300415039
            name: Dot F1 Threshold
          - type: dot_precision
            value: 0.7926547878477118
            name: Dot Precision
          - type: dot_recall
            value: 0.8893923049262855
            name: Dot Recall
          - type: dot_ap
            value: 0.8808088425591442
            name: Dot Ap
          - type: manhattan_accuracy
            value: 0.8519
            name: Manhattan Accuracy
          - type: manhattan_accuracy_threshold
            value: 54.21998596191406
            name: Manhattan Accuracy Threshold
          - type: manhattan_f1
            value: 0.8715498573540026
            name: Manhattan F1
          - type: manhattan_f1_threshold
            value: 57.27758026123047
            name: Manhattan F1 Threshold
          - type: manhattan_precision
            value: 0.8347379510139584
            name: Manhattan Precision
          - type: manhattan_recall
            value: 0.9117583603020496
            name: Manhattan Recall
          - type: manhattan_ap
            value: 0.8994757702061444
            name: Manhattan Ap
          - type: euclidean_accuracy
            value: 0.85192
            name: Euclidean Accuracy
          - type: euclidean_accuracy_threshold
            value: 3.4671199321746826
            name: Euclidean Accuracy Threshold
          - type: euclidean_f1
            value: 0.8717798493960334
            name: Euclidean F1
          - type: euclidean_f1_threshold
            value: 3.664275646209717
            name: Euclidean F1 Threshold
          - type: euclidean_precision
            value: 0.8369784601131589
            name: Euclidean Precision
          - type: euclidean_recall
            value: 0.9096008629989213
            name: Euclidean Recall
          - type: euclidean_ap
            value: 0.8996992828192123
            name: Euclidean Ap
          - type: max_accuracy
            value: 0.85498
            name: Max Accuracy
          - type: max_accuracy_threshold
            value: 54.21998596191406
            name: Max Accuracy Threshold
          - type: max_f1
            value: 0.8740799339616153
            name: Max F1
          - type: max_f1_threshold
            value: 57.27758026123047
            name: Max F1 Threshold
          - type: max_precision
            value: 0.8376623376623377
            name: Max Precision
          - type: max_recall
            value: 0.9138079827400216
            name: Max Recall
          - type: max_ap
            value: 0.9043744924756869
            name: Max Ap

SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2

This is a sentence-transformers model finetuned from sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Type: Sentence Transformer
Base model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
Maximum Sequence Length: 128 tokens
Output Dimensionality: 384 tokens
Similarity Function: Cosine Similarity

Model Sources

Documentation: Sentence Transformers Documentation
Repository: Sentence Transformers on GitHub
Hugging Face: Sentence Transformers on Hugging Face

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("DashReza7/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2_FINETUNED_on_torob_data_v2_3")
# Run inference
sentences = [
    'هندزفری بلوتوث جبرا ',
    'هدست بلوتوث جبرا Mini هندزفری بلوتوث جبرا Jabra Mini Bluetooth Handsfree هدست بلوتوث جبرا  مدل Mini هندزفری بلوتوث جبرا MINI',
    'گاز پیک نیک 5 کیلویی شیدا گاز',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Binary Classification

Evaluated with BinaryClassificationEvaluator

Metric	Value
cosine_accuracy	0.855
cosine_accuracy_threshold	0.773
cosine_f1	0.8741
cosine_f1_threshold	0.7387
cosine_precision	0.8377
cosine_recall	0.9138
cosine_ap	0.9044
dot_accuracy	0.8117
dot_accuracy_threshold	18.6845
dot_f1	0.8382
dot_f1_threshold	18.0047
dot_precision	0.7927
dot_recall	0.8894
dot_ap	0.8808
manhattan_accuracy	0.8519
manhattan_accuracy_threshold	54.22
manhattan_f1	0.8715
manhattan_f1_threshold	57.2776
manhattan_precision	0.8347
manhattan_recall	0.9118
manhattan_ap	0.8995
euclidean_accuracy	0.8519
euclidean_accuracy_threshold	3.4671
euclidean_f1	0.8718
euclidean_f1_threshold	3.6643
euclidean_precision	0.837
euclidean_recall	0.9096
euclidean_ap	0.8997
max_accuracy	0.855
max_accuracy_threshold	54.22
max_f1	0.8741
max_f1_threshold	57.2776
max_precision	0.8377
max_recall	0.9138
max_ap	0.9044

Training Details

Training Hyperparameters

Non-Default Hyperparameters

eval_strategy: steps
per_device_train_batch_size: 64
per_device_eval_batch_size: 64
learning_rate: 2e-05
num_train_epochs: 1
warmup_ratio: 0.1
fp16: True

All Hyperparameters

Click to expand

overwrite_output_dir: False
do_predict: False
eval_strategy: steps
prediction_loss_only: True
per_device_train_batch_size: 64
per_device_eval_batch_size: 64
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 1
eval_accumulation_steps: None
learning_rate: 2e-05
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 1
max_steps: -1
lr_scheduler_type: linear
lr_scheduler_kwargs: {}
warmup_ratio: 0.1
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: True
logging_nan_inf_filter: True
save_safetensors: True
save_on_each_node: False
save_only_model: False
restore_callback_states_from_checkpoint: False
no_cuda: False
use_cpu: False
use_mps_device: False
seed: 42
data_seed: None
jit_mode_eval: False
use_ipex: False
bf16: False
fp16: True
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: False
fp16_full_eval: False
tf32: None
local_rank: 0
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: False
debug: []
dataloader_drop_last: False
dataloader_num_workers: 0
dataloader_prefetch_factor: None
past_index: -1
disable_tqdm: False
remove_unused_columns: True
label_names: None
load_best_model_at_end: False
ignore_data_skip: False
fsdp: []
fsdp_min_num_params: 0
fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed: None
label_smoothing_factor: 0.0
optim: adamw_torch
optim_args: None
adafactor: False
group_by_length: False
length_column_name: length
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: False
dataloader_pin_memory: True
dataloader_persistent_workers: False
skip_memory_metrics: True
use_legacy_prediction_loop: False
push_to_hub: False
resume_from_checkpoint: None
hub_model_id: None
hub_strategy: every_save
hub_private_repo: False
hub_always_push: False
gradient_checkpointing: False
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: False
eval_do_concat_batches: True
fp16_backend: auto
push_to_hub_model_id: None
push_to_hub_organization: None
mp_parameters:
auto_find_batch_size: False
full_determinism: False
torchdynamo: None
ray_scope: last
ddp_timeout: 1800
torch_compile: False
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: False
include_num_input_tokens_seen: False
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: False
eval_on_start: False
batch_sampler: batch_sampler
multi_dataset_batch_sampler: proportional

Training Logs

Epoch	Step	Training Loss	loss	max_ap
0.0711	500	0.0318	-	-
0.1422	1000	0.0201	-	-
0.2133	1500	0.0183	-	-
0.2844	2000	0.0171	0.0166	0.8756
0.3555	2500	0.0164	-	-
0.4266	3000	0.0161	-	-
0.4977	3500	0.0155	-	-
0.5688	4000	0.0153	0.0147	0.8955
0.6399	4500	0.015	-	-
0.7110	5000	0.0145	-	-
0.7821	5500	0.0144	-	-
0.8532	6000	0.0143	0.0138	0.9044
0.9243	6500	0.0141	-	-
0.9954	7000	0.0139	-	-

Framework Versions

Python: 3.10.12
Sentence Transformers: 3.0.1
Transformers: 4.42.4
PyTorch: 2.4.0+cu121
Accelerate: 0.32.1
Datasets: 2.21.0
Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

ContrastiveLoss

@inproceedings{hadsell2006dimensionality,
    author={Hadsell, R. and Chopra, S. and LeCun, Y.},
    booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)}, 
    title={Dimensionality Reduction by Learning an Invariant Mapping}, 
    year={2006},
    volume={2},
    number={},
    pages={1735-1742},
    doi={10.1109/CVPR.2006.100}
}