Add proper Sentence Transformers integration (#1)
Browse files- Add ST-specific configuration files with model.save() (7260df65f5d871b2743995da4cc2342b8f954844)
- Also add a transformers tag (c58c14f76620c82c5e8e307c91e8edda2eda2a72)
- Following SFR-Embedding-Mistral, set max seq length to 4096 (6cf3bfedf3c5c2d24b7730664985cdc11ffbb154)
- 1_Pooling/config.json +10 -0
- README.md +5 -4
- config_sentence_transformers.json +10 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 4096,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": true,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
---
|
2 |
tags:
|
3 |
- mteb
|
|
|
|
|
4 |
model-index:
|
5 |
- name: Salesforce/SFR-Embedding-2_R
|
6 |
results:
|
@@ -2007,8 +2009,7 @@ print(scores.tolist())
|
|
2007 |
|
2008 |
### Sentence Transformers
|
2009 |
```python
|
2010 |
-
|
2011 |
-
from sentence_transformers import SentenceTransformer, util
|
2012 |
|
2013 |
model = SentenceTransformer("Salesforce/SFR-Embedding-2_R")
|
2014 |
|
@@ -2028,9 +2029,9 @@ passages = [
|
|
2028 |
]
|
2029 |
|
2030 |
embeddings = model.encode(queries + passages)
|
2031 |
-
scores =
|
2032 |
print(scores.tolist())
|
2033 |
-
# [[40.
|
2034 |
```
|
2035 |
|
2036 |
|
|
|
1 |
---
|
2 |
tags:
|
3 |
- mteb
|
4 |
+
- sentence-transformers
|
5 |
+
- transformers
|
6 |
model-index:
|
7 |
- name: Salesforce/SFR-Embedding-2_R
|
8 |
results:
|
|
|
2009 |
|
2010 |
### Sentence Transformers
|
2011 |
```python
|
2012 |
+
from sentence_transformers import SentenceTransformer
|
|
|
2013 |
|
2014 |
model = SentenceTransformer("Salesforce/SFR-Embedding-2_R")
|
2015 |
|
|
|
2029 |
]
|
2030 |
|
2031 |
embeddings = model.encode(queries + passages)
|
2032 |
+
scores = model.similarity(embeddings[:2], embeddings[2:]) * 100
|
2033 |
print(scores.tolist())
|
2034 |
+
# [[40.13203811645508, 25.032546997070312], [15.00684642791748, 39.937339782714844]]
|
2035 |
```
|
2036 |
|
2037 |
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.41.2",
|
5 |
+
"pytorch": "2.3.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 4096,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|