ldwang commited on
Commit
33cd644
1 Parent(s): 2616ef0
Files changed (1) hide show
  1. README.md +241 -2693
README.md CHANGED
@@ -1,2609 +1,16 @@
1
  ---
 
2
  tags:
3
- - mteb
4
- - sentence-transfomres
 
5
  - transformers
6
- model-index:
7
- - name: bge-large-en
8
- results:
9
- - task:
10
- type: Classification
11
- dataset:
12
- type: mteb/amazon_counterfactual
13
- name: MTEB AmazonCounterfactualClassification (en)
14
- config: en
15
- split: test
16
- revision: e8379541af4e31359cca9fbcf4b00f2671dba205
17
- metrics:
18
- - type: accuracy
19
- value: 76.94029850746269
20
- - type: ap
21
- value: 40.00228964744091
22
- - type: f1
23
- value: 70.86088267934595
24
- - task:
25
- type: Classification
26
- dataset:
27
- type: mteb/amazon_polarity
28
- name: MTEB AmazonPolarityClassification
29
- config: default
30
- split: test
31
- revision: e2d317d38cd51312af73b3d32a06d1a08b442046
32
- metrics:
33
- - type: accuracy
34
- value: 91.93745
35
- - type: ap
36
- value: 88.24758534667426
37
- - type: f1
38
- value: 91.91033034217591
39
- - task:
40
- type: Classification
41
- dataset:
42
- type: mteb/amazon_reviews_multi
43
- name: MTEB AmazonReviewsClassification (en)
44
- config: en
45
- split: test
46
- revision: 1399c76144fd37290681b995c656ef9b2e06e26d
47
- metrics:
48
- - type: accuracy
49
- value: 46.158
50
- - type: f1
51
- value: 45.78935185074774
52
- - task:
53
- type: Retrieval
54
- dataset:
55
- type: arguana
56
- name: MTEB ArguAna
57
- config: default
58
- split: test
59
- revision: None
60
- metrics:
61
- - type: map_at_1
62
- value: 39.972
63
- - type: map_at_10
64
- value: 54.874
65
- - type: map_at_100
66
- value: 55.53399999999999
67
- - type: map_at_1000
68
- value: 55.539
69
- - type: map_at_3
70
- value: 51.031000000000006
71
- - type: map_at_5
72
- value: 53.342999999999996
73
- - type: mrr_at_1
74
- value: 40.541
75
- - type: mrr_at_10
76
- value: 55.096000000000004
77
- - type: mrr_at_100
78
- value: 55.75599999999999
79
- - type: mrr_at_1000
80
- value: 55.761
81
- - type: mrr_at_3
82
- value: 51.221000000000004
83
- - type: mrr_at_5
84
- value: 53.568000000000005
85
- - type: ndcg_at_1
86
- value: 39.972
87
- - type: ndcg_at_10
88
- value: 62.456999999999994
89
- - type: ndcg_at_100
90
- value: 65.262
91
- - type: ndcg_at_1000
92
- value: 65.389
93
- - type: ndcg_at_3
94
- value: 54.673
95
- - type: ndcg_at_5
96
- value: 58.80499999999999
97
- - type: precision_at_1
98
- value: 39.972
99
- - type: precision_at_10
100
- value: 8.634
101
- - type: precision_at_100
102
- value: 0.9860000000000001
103
- - type: precision_at_1000
104
- value: 0.1
105
- - type: precision_at_3
106
- value: 21.740000000000002
107
- - type: precision_at_5
108
- value: 15.036
109
- - type: recall_at_1
110
- value: 39.972
111
- - type: recall_at_10
112
- value: 86.344
113
- - type: recall_at_100
114
- value: 98.578
115
- - type: recall_at_1000
116
- value: 99.57300000000001
117
- - type: recall_at_3
118
- value: 65.22
119
- - type: recall_at_5
120
- value: 75.178
121
- - task:
122
- type: Clustering
123
- dataset:
124
- type: mteb/arxiv-clustering-p2p
125
- name: MTEB ArxivClusteringP2P
126
- config: default
127
- split: test
128
- revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
129
- metrics:
130
- - type: v_measure
131
- value: 48.94652870403906
132
- - task:
133
- type: Clustering
134
- dataset:
135
- type: mteb/arxiv-clustering-s2s
136
- name: MTEB ArxivClusteringS2S
137
- config: default
138
- split: test
139
- revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
140
- metrics:
141
- - type: v_measure
142
- value: 43.17257160340209
143
- - task:
144
- type: Reranking
145
- dataset:
146
- type: mteb/askubuntudupquestions-reranking
147
- name: MTEB AskUbuntuDupQuestions
148
- config: default
149
- split: test
150
- revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
151
- metrics:
152
- - type: map
153
- value: 63.97867370559182
154
- - type: mrr
155
- value: 77.00820032537484
156
- - task:
157
- type: STS
158
- dataset:
159
- type: mteb/biosses-sts
160
- name: MTEB BIOSSES
161
- config: default
162
- split: test
163
- revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
164
- metrics:
165
- - type: cos_sim_pearson
166
- value: 80.00986015960616
167
- - type: cos_sim_spearman
168
- value: 80.36387933827882
169
- - type: euclidean_pearson
170
- value: 80.32305287257296
171
- - type: euclidean_spearman
172
- value: 82.0524720308763
173
- - type: manhattan_pearson
174
- value: 80.19847473906454
175
- - type: manhattan_spearman
176
- value: 81.87957652506985
177
- - task:
178
- type: Classification
179
- dataset:
180
- type: mteb/banking77
181
- name: MTEB Banking77Classification
182
- config: default
183
- split: test
184
- revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
185
- metrics:
186
- - type: accuracy
187
- value: 88.00000000000001
188
- - type: f1
189
- value: 87.99039027511853
190
- - task:
191
- type: Clustering
192
- dataset:
193
- type: mteb/biorxiv-clustering-p2p
194
- name: MTEB BiorxivClusteringP2P
195
- config: default
196
- split: test
197
- revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
198
- metrics:
199
- - type: v_measure
200
- value: 41.36932844640705
201
- - task:
202
- type: Clustering
203
- dataset:
204
- type: mteb/biorxiv-clustering-s2s
205
- name: MTEB BiorxivClusteringS2S
206
- config: default
207
- split: test
208
- revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
209
- metrics:
210
- - type: v_measure
211
- value: 38.34983239611985
212
- - task:
213
- type: Retrieval
214
- dataset:
215
- type: BeIR/cqadupstack
216
- name: MTEB CQADupstackAndroidRetrieval
217
- config: default
218
- split: test
219
- revision: None
220
- metrics:
221
- - type: map_at_1
222
- value: 32.257999999999996
223
- - type: map_at_10
224
- value: 42.937
225
- - type: map_at_100
226
- value: 44.406
227
- - type: map_at_1000
228
- value: 44.536
229
- - type: map_at_3
230
- value: 39.22
231
- - type: map_at_5
232
- value: 41.458
233
- - type: mrr_at_1
234
- value: 38.769999999999996
235
- - type: mrr_at_10
236
- value: 48.701
237
- - type: mrr_at_100
238
- value: 49.431000000000004
239
- - type: mrr_at_1000
240
- value: 49.476
241
- - type: mrr_at_3
242
- value: 45.875
243
- - type: mrr_at_5
244
- value: 47.67
245
- - type: ndcg_at_1
246
- value: 38.769999999999996
247
- - type: ndcg_at_10
248
- value: 49.35
249
- - type: ndcg_at_100
250
- value: 54.618
251
- - type: ndcg_at_1000
252
- value: 56.655
253
- - type: ndcg_at_3
254
- value: 43.826
255
- - type: ndcg_at_5
256
- value: 46.72
257
- - type: precision_at_1
258
- value: 38.769999999999996
259
- - type: precision_at_10
260
- value: 9.328
261
- - type: precision_at_100
262
- value: 1.484
263
- - type: precision_at_1000
264
- value: 0.196
265
- - type: precision_at_3
266
- value: 20.649
267
- - type: precision_at_5
268
- value: 15.25
269
- - type: recall_at_1
270
- value: 32.257999999999996
271
- - type: recall_at_10
272
- value: 61.849
273
- - type: recall_at_100
274
- value: 83.70400000000001
275
- - type: recall_at_1000
276
- value: 96.344
277
- - type: recall_at_3
278
- value: 46.037
279
- - type: recall_at_5
280
- value: 53.724000000000004
281
- - task:
282
- type: Retrieval
283
- dataset:
284
- type: BeIR/cqadupstack
285
- name: MTEB CQADupstackEnglishRetrieval
286
- config: default
287
- split: test
288
- revision: None
289
- metrics:
290
- - type: map_at_1
291
- value: 32.979
292
- - type: map_at_10
293
- value: 43.376999999999995
294
- - type: map_at_100
295
- value: 44.667
296
- - type: map_at_1000
297
- value: 44.794
298
- - type: map_at_3
299
- value: 40.461999999999996
300
- - type: map_at_5
301
- value: 42.138
302
- - type: mrr_at_1
303
- value: 41.146
304
- - type: mrr_at_10
305
- value: 49.575
306
- - type: mrr_at_100
307
- value: 50.187000000000005
308
- - type: mrr_at_1000
309
- value: 50.231
310
- - type: mrr_at_3
311
- value: 47.601
312
- - type: mrr_at_5
313
- value: 48.786
314
- - type: ndcg_at_1
315
- value: 41.146
316
- - type: ndcg_at_10
317
- value: 48.957
318
- - type: ndcg_at_100
319
- value: 53.296
320
- - type: ndcg_at_1000
321
- value: 55.254000000000005
322
- - type: ndcg_at_3
323
- value: 45.235
324
- - type: ndcg_at_5
325
- value: 47.014
326
- - type: precision_at_1
327
- value: 41.146
328
- - type: precision_at_10
329
- value: 9.107999999999999
330
- - type: precision_at_100
331
- value: 1.481
332
- - type: precision_at_1000
333
- value: 0.193
334
- - type: precision_at_3
335
- value: 21.783
336
- - type: precision_at_5
337
- value: 15.274
338
- - type: recall_at_1
339
- value: 32.979
340
- - type: recall_at_10
341
- value: 58.167
342
- - type: recall_at_100
343
- value: 76.374
344
- - type: recall_at_1000
345
- value: 88.836
346
- - type: recall_at_3
347
- value: 46.838
348
- - type: recall_at_5
349
- value: 52.006
350
- - task:
351
- type: Retrieval
352
- dataset:
353
- type: BeIR/cqadupstack
354
- name: MTEB CQADupstackGamingRetrieval
355
- config: default
356
- split: test
357
- revision: None
358
- metrics:
359
- - type: map_at_1
360
- value: 40.326
361
- - type: map_at_10
362
- value: 53.468
363
- - type: map_at_100
364
- value: 54.454
365
- - type: map_at_1000
366
- value: 54.508
367
- - type: map_at_3
368
- value: 50.12799999999999
369
- - type: map_at_5
370
- value: 51.991
371
- - type: mrr_at_1
372
- value: 46.394999999999996
373
- - type: mrr_at_10
374
- value: 57.016999999999996
375
- - type: mrr_at_100
376
- value: 57.67099999999999
377
- - type: mrr_at_1000
378
- value: 57.699999999999996
379
- - type: mrr_at_3
380
- value: 54.65
381
- - type: mrr_at_5
382
- value: 56.101
383
- - type: ndcg_at_1
384
- value: 46.394999999999996
385
- - type: ndcg_at_10
386
- value: 59.507
387
- - type: ndcg_at_100
388
- value: 63.31099999999999
389
- - type: ndcg_at_1000
390
- value: 64.388
391
- - type: ndcg_at_3
392
- value: 54.04600000000001
393
- - type: ndcg_at_5
394
- value: 56.723
395
- - type: precision_at_1
396
- value: 46.394999999999996
397
- - type: precision_at_10
398
- value: 9.567
399
- - type: precision_at_100
400
- value: 1.234
401
- - type: precision_at_1000
402
- value: 0.13699999999999998
403
- - type: precision_at_3
404
- value: 24.117
405
- - type: precision_at_5
406
- value: 16.426
407
- - type: recall_at_1
408
- value: 40.326
409
- - type: recall_at_10
410
- value: 73.763
411
- - type: recall_at_100
412
- value: 89.927
413
- - type: recall_at_1000
414
- value: 97.509
415
- - type: recall_at_3
416
- value: 59.34
417
- - type: recall_at_5
418
- value: 65.915
419
- - task:
420
- type: Retrieval
421
- dataset:
422
- type: BeIR/cqadupstack
423
- name: MTEB CQADupstackGisRetrieval
424
- config: default
425
- split: test
426
- revision: None
427
- metrics:
428
- - type: map_at_1
429
- value: 26.661
430
- - type: map_at_10
431
- value: 35.522
432
- - type: map_at_100
433
- value: 36.619
434
- - type: map_at_1000
435
- value: 36.693999999999996
436
- - type: map_at_3
437
- value: 33.154
438
- - type: map_at_5
439
- value: 34.353
440
- - type: mrr_at_1
441
- value: 28.362
442
- - type: mrr_at_10
443
- value: 37.403999999999996
444
- - type: mrr_at_100
445
- value: 38.374
446
- - type: mrr_at_1000
447
- value: 38.428000000000004
448
- - type: mrr_at_3
449
- value: 35.235
450
- - type: mrr_at_5
451
- value: 36.269
452
- - type: ndcg_at_1
453
- value: 28.362
454
- - type: ndcg_at_10
455
- value: 40.431
456
- - type: ndcg_at_100
457
- value: 45.745999999999995
458
- - type: ndcg_at_1000
459
- value: 47.493
460
- - type: ndcg_at_3
461
- value: 35.733
462
- - type: ndcg_at_5
463
- value: 37.722
464
- - type: precision_at_1
465
- value: 28.362
466
- - type: precision_at_10
467
- value: 6.101999999999999
468
- - type: precision_at_100
469
- value: 0.922
470
- - type: precision_at_1000
471
- value: 0.11100000000000002
472
- - type: precision_at_3
473
- value: 15.140999999999998
474
- - type: precision_at_5
475
- value: 10.305
476
- - type: recall_at_1
477
- value: 26.661
478
- - type: recall_at_10
479
- value: 53.675
480
- - type: recall_at_100
481
- value: 77.891
482
- - type: recall_at_1000
483
- value: 90.72
484
- - type: recall_at_3
485
- value: 40.751
486
- - type: recall_at_5
487
- value: 45.517
488
- - task:
489
- type: Retrieval
490
- dataset:
491
- type: BeIR/cqadupstack
492
- name: MTEB CQADupstackMathematicaRetrieval
493
- config: default
494
- split: test
495
- revision: None
496
- metrics:
497
- - type: map_at_1
498
- value: 18.886
499
- - type: map_at_10
500
- value: 27.288
501
- - type: map_at_100
502
- value: 28.327999999999996
503
- - type: map_at_1000
504
- value: 28.438999999999997
505
- - type: map_at_3
506
- value: 24.453
507
- - type: map_at_5
508
- value: 25.959
509
- - type: mrr_at_1
510
- value: 23.134
511
- - type: mrr_at_10
512
- value: 32.004
513
- - type: mrr_at_100
514
- value: 32.789
515
- - type: mrr_at_1000
516
- value: 32.857
517
- - type: mrr_at_3
518
- value: 29.084
519
- - type: mrr_at_5
520
- value: 30.614
521
- - type: ndcg_at_1
522
- value: 23.134
523
- - type: ndcg_at_10
524
- value: 32.852
525
- - type: ndcg_at_100
526
- value: 37.972
527
- - type: ndcg_at_1000
528
- value: 40.656
529
- - type: ndcg_at_3
530
- value: 27.435
531
- - type: ndcg_at_5
532
- value: 29.823
533
- - type: precision_at_1
534
- value: 23.134
535
- - type: precision_at_10
536
- value: 6.032
537
- - type: precision_at_100
538
- value: 0.9950000000000001
539
- - type: precision_at_1000
540
- value: 0.136
541
- - type: precision_at_3
542
- value: 13.017999999999999
543
- - type: precision_at_5
544
- value: 9.501999999999999
545
- - type: recall_at_1
546
- value: 18.886
547
- - type: recall_at_10
548
- value: 45.34
549
- - type: recall_at_100
550
- value: 67.947
551
- - type: recall_at_1000
552
- value: 86.924
553
- - type: recall_at_3
554
- value: 30.535
555
- - type: recall_at_5
556
- value: 36.451
557
- - task:
558
- type: Retrieval
559
- dataset:
560
- type: BeIR/cqadupstack
561
- name: MTEB CQADupstackPhysicsRetrieval
562
- config: default
563
- split: test
564
- revision: None
565
- metrics:
566
- - type: map_at_1
567
- value: 28.994999999999997
568
- - type: map_at_10
569
- value: 40.04
570
- - type: map_at_100
571
- value: 41.435
572
- - type: map_at_1000
573
- value: 41.537
574
- - type: map_at_3
575
- value: 37.091
576
- - type: map_at_5
577
- value: 38.802
578
- - type: mrr_at_1
579
- value: 35.034
580
- - type: mrr_at_10
581
- value: 45.411
582
- - type: mrr_at_100
583
- value: 46.226
584
- - type: mrr_at_1000
585
- value: 46.27
586
- - type: mrr_at_3
587
- value: 43.086
588
- - type: mrr_at_5
589
- value: 44.452999999999996
590
- - type: ndcg_at_1
591
- value: 35.034
592
- - type: ndcg_at_10
593
- value: 46.076
594
- - type: ndcg_at_100
595
- value: 51.483000000000004
596
- - type: ndcg_at_1000
597
- value: 53.433
598
- - type: ndcg_at_3
599
- value: 41.304
600
- - type: ndcg_at_5
601
- value: 43.641999999999996
602
- - type: precision_at_1
603
- value: 35.034
604
- - type: precision_at_10
605
- value: 8.258000000000001
606
- - type: precision_at_100
607
- value: 1.268
608
- - type: precision_at_1000
609
- value: 0.161
610
- - type: precision_at_3
611
- value: 19.57
612
- - type: precision_at_5
613
- value: 13.782
614
- - type: recall_at_1
615
- value: 28.994999999999997
616
- - type: recall_at_10
617
- value: 58.538000000000004
618
- - type: recall_at_100
619
- value: 80.72399999999999
620
- - type: recall_at_1000
621
- value: 93.462
622
- - type: recall_at_3
623
- value: 45.199
624
- - type: recall_at_5
625
- value: 51.237
626
- - task:
627
- type: Retrieval
628
- dataset:
629
- type: BeIR/cqadupstack
630
- name: MTEB CQADupstackProgrammersRetrieval
631
- config: default
632
- split: test
633
- revision: None
634
- metrics:
635
- - type: map_at_1
636
- value: 24.795
637
- - type: map_at_10
638
- value: 34.935
639
- - type: map_at_100
640
- value: 36.306
641
- - type: map_at_1000
642
- value: 36.417
643
- - type: map_at_3
644
- value: 31.831
645
- - type: map_at_5
646
- value: 33.626
647
- - type: mrr_at_1
648
- value: 30.479
649
- - type: mrr_at_10
650
- value: 40.225
651
- - type: mrr_at_100
652
- value: 41.055
653
- - type: mrr_at_1000
654
- value: 41.114
655
- - type: mrr_at_3
656
- value: 37.538
657
- - type: mrr_at_5
658
- value: 39.073
659
- - type: ndcg_at_1
660
- value: 30.479
661
- - type: ndcg_at_10
662
- value: 40.949999999999996
663
- - type: ndcg_at_100
664
- value: 46.525
665
- - type: ndcg_at_1000
666
- value: 48.892
667
- - type: ndcg_at_3
668
- value: 35.79
669
- - type: ndcg_at_5
670
- value: 38.237
671
- - type: precision_at_1
672
- value: 30.479
673
- - type: precision_at_10
674
- value: 7.6259999999999994
675
- - type: precision_at_100
676
- value: 1.203
677
- - type: precision_at_1000
678
- value: 0.157
679
- - type: precision_at_3
680
- value: 17.199
681
- - type: precision_at_5
682
- value: 12.466000000000001
683
- - type: recall_at_1
684
- value: 24.795
685
- - type: recall_at_10
686
- value: 53.421
687
- - type: recall_at_100
688
- value: 77.189
689
- - type: recall_at_1000
690
- value: 93.407
691
- - type: recall_at_3
692
- value: 39.051
693
- - type: recall_at_5
694
- value: 45.462
695
- - task:
696
- type: Retrieval
697
- dataset:
698
- type: BeIR/cqadupstack
699
- name: MTEB CQADupstackRetrieval
700
- config: default
701
- split: test
702
- revision: None
703
- metrics:
704
- - type: map_at_1
705
- value: 26.853499999999997
706
- - type: map_at_10
707
- value: 36.20433333333333
708
- - type: map_at_100
709
- value: 37.40391666666667
710
- - type: map_at_1000
711
- value: 37.515
712
- - type: map_at_3
713
- value: 33.39975
714
- - type: map_at_5
715
- value: 34.9665
716
- - type: mrr_at_1
717
- value: 31.62666666666667
718
- - type: mrr_at_10
719
- value: 40.436749999999996
720
- - type: mrr_at_100
721
- value: 41.260333333333335
722
- - type: mrr_at_1000
723
- value: 41.31525
724
- - type: mrr_at_3
725
- value: 38.06733333333332
726
- - type: mrr_at_5
727
- value: 39.41541666666667
728
- - type: ndcg_at_1
729
- value: 31.62666666666667
730
- - type: ndcg_at_10
731
- value: 41.63341666666667
732
- - type: ndcg_at_100
733
- value: 46.704166666666666
734
- - type: ndcg_at_1000
735
- value: 48.88483333333335
736
- - type: ndcg_at_3
737
- value: 36.896
738
- - type: ndcg_at_5
739
- value: 39.11891666666667
740
- - type: precision_at_1
741
- value: 31.62666666666667
742
- - type: precision_at_10
743
- value: 7.241083333333333
744
- - type: precision_at_100
745
- value: 1.1488333333333334
746
- - type: precision_at_1000
747
- value: 0.15250000000000002
748
- - type: precision_at_3
749
- value: 16.908333333333335
750
- - type: precision_at_5
751
- value: 11.942833333333333
752
- - type: recall_at_1
753
- value: 26.853499999999997
754
- - type: recall_at_10
755
- value: 53.461333333333336
756
- - type: recall_at_100
757
- value: 75.63633333333333
758
- - type: recall_at_1000
759
- value: 90.67016666666666
760
- - type: recall_at_3
761
- value: 40.24241666666667
762
- - type: recall_at_5
763
- value: 45.98608333333333
764
- - task:
765
- type: Retrieval
766
- dataset:
767
- type: BeIR/cqadupstack
768
- name: MTEB CQADupstackStatsRetrieval
769
- config: default
770
- split: test
771
- revision: None
772
- metrics:
773
- - type: map_at_1
774
- value: 25.241999999999997
775
- - type: map_at_10
776
- value: 31.863999999999997
777
- - type: map_at_100
778
- value: 32.835
779
- - type: map_at_1000
780
- value: 32.928000000000004
781
- - type: map_at_3
782
- value: 29.694
783
- - type: map_at_5
784
- value: 30.978
785
- - type: mrr_at_1
786
- value: 28.374
787
- - type: mrr_at_10
788
- value: 34.814
789
- - type: mrr_at_100
790
- value: 35.596
791
- - type: mrr_at_1000
792
- value: 35.666
793
- - type: mrr_at_3
794
- value: 32.745000000000005
795
- - type: mrr_at_5
796
- value: 34.049
797
- - type: ndcg_at_1
798
- value: 28.374
799
- - type: ndcg_at_10
800
- value: 35.969
801
- - type: ndcg_at_100
802
- value: 40.708
803
- - type: ndcg_at_1000
804
- value: 43.08
805
- - type: ndcg_at_3
806
- value: 31.968999999999998
807
- - type: ndcg_at_5
808
- value: 34.069
809
- - type: precision_at_1
810
- value: 28.374
811
- - type: precision_at_10
812
- value: 5.583
813
- - type: precision_at_100
814
- value: 0.8630000000000001
815
- - type: precision_at_1000
816
- value: 0.11299999999999999
817
- - type: precision_at_3
818
- value: 13.547999999999998
819
- - type: precision_at_5
820
- value: 9.447999999999999
821
- - type: recall_at_1
822
- value: 25.241999999999997
823
- - type: recall_at_10
824
- value: 45.711
825
- - type: recall_at_100
826
- value: 67.482
827
- - type: recall_at_1000
828
- value: 85.13300000000001
829
- - type: recall_at_3
830
- value: 34.622
831
- - type: recall_at_5
832
- value: 40.043
833
- - task:
834
- type: Retrieval
835
- dataset:
836
- type: BeIR/cqadupstack
837
- name: MTEB CQADupstackTexRetrieval
838
- config: default
839
- split: test
840
- revision: None
841
- metrics:
842
- - type: map_at_1
843
- value: 17.488999999999997
844
- - type: map_at_10
845
- value: 25.142999999999997
846
- - type: map_at_100
847
- value: 26.244
848
- - type: map_at_1000
849
- value: 26.363999999999997
850
- - type: map_at_3
851
- value: 22.654
852
- - type: map_at_5
853
- value: 24.017
854
- - type: mrr_at_1
855
- value: 21.198
856
- - type: mrr_at_10
857
- value: 28.903000000000002
858
- - type: mrr_at_100
859
- value: 29.860999999999997
860
- - type: mrr_at_1000
861
- value: 29.934
862
- - type: mrr_at_3
863
- value: 26.634999999999998
864
- - type: mrr_at_5
865
- value: 27.903
866
- - type: ndcg_at_1
867
- value: 21.198
868
- - type: ndcg_at_10
869
- value: 29.982999999999997
870
- - type: ndcg_at_100
871
- value: 35.275
872
- - type: ndcg_at_1000
873
- value: 38.074000000000005
874
- - type: ndcg_at_3
875
- value: 25.502999999999997
876
- - type: ndcg_at_5
877
- value: 27.557
878
- - type: precision_at_1
879
- value: 21.198
880
- - type: precision_at_10
881
- value: 5.502
882
- - type: precision_at_100
883
- value: 0.942
884
- - type: precision_at_1000
885
- value: 0.136
886
- - type: precision_at_3
887
- value: 12.044
888
- - type: precision_at_5
889
- value: 8.782
890
- - type: recall_at_1
891
- value: 17.488999999999997
892
- - type: recall_at_10
893
- value: 40.821000000000005
894
- - type: recall_at_100
895
- value: 64.567
896
- - type: recall_at_1000
897
- value: 84.452
898
- - type: recall_at_3
899
- value: 28.351
900
- - type: recall_at_5
901
- value: 33.645
902
- - task:
903
- type: Retrieval
904
- dataset:
905
- type: BeIR/cqadupstack
906
- name: MTEB CQADupstackUnixRetrieval
907
- config: default
908
- split: test
909
- revision: None
910
- metrics:
911
- - type: map_at_1
912
- value: 27.066000000000003
913
- - type: map_at_10
914
- value: 36.134
915
- - type: map_at_100
916
- value: 37.285000000000004
917
- - type: map_at_1000
918
- value: 37.389
919
- - type: map_at_3
920
- value: 33.522999999999996
921
- - type: map_at_5
922
- value: 34.905
923
- - type: mrr_at_1
924
- value: 31.436999999999998
925
- - type: mrr_at_10
926
- value: 40.225
927
- - type: mrr_at_100
928
- value: 41.079
929
- - type: mrr_at_1000
930
- value: 41.138000000000005
931
- - type: mrr_at_3
932
- value: 38.074999999999996
933
- - type: mrr_at_5
934
- value: 39.190000000000005
935
- - type: ndcg_at_1
936
- value: 31.436999999999998
937
- - type: ndcg_at_10
938
- value: 41.494
939
- - type: ndcg_at_100
940
- value: 46.678999999999995
941
- - type: ndcg_at_1000
942
- value: 48.964
943
- - type: ndcg_at_3
944
- value: 36.828
945
- - type: ndcg_at_5
946
- value: 38.789
947
- - type: precision_at_1
948
- value: 31.436999999999998
949
- - type: precision_at_10
950
- value: 6.931
951
- - type: precision_at_100
952
- value: 1.072
953
- - type: precision_at_1000
954
- value: 0.13799999999999998
955
- - type: precision_at_3
956
- value: 16.729
957
- - type: precision_at_5
958
- value: 11.567
959
- - type: recall_at_1
960
- value: 27.066000000000003
961
- - type: recall_at_10
962
- value: 53.705000000000005
963
- - type: recall_at_100
964
- value: 75.968
965
- - type: recall_at_1000
966
- value: 91.937
967
- - type: recall_at_3
968
- value: 40.865
969
- - type: recall_at_5
970
- value: 45.739999999999995
971
- - task:
972
- type: Retrieval
973
- dataset:
974
- type: BeIR/cqadupstack
975
- name: MTEB CQADupstackWebmastersRetrieval
976
- config: default
977
- split: test
978
- revision: None
979
- metrics:
980
- - type: map_at_1
981
- value: 24.979000000000003
982
- - type: map_at_10
983
- value: 32.799
984
- - type: map_at_100
985
- value: 34.508
986
- - type: map_at_1000
987
- value: 34.719
988
- - type: map_at_3
989
- value: 29.947000000000003
990
- - type: map_at_5
991
- value: 31.584
992
- - type: mrr_at_1
993
- value: 30.237000000000002
994
- - type: mrr_at_10
995
- value: 37.651
996
- - type: mrr_at_100
997
- value: 38.805
998
- - type: mrr_at_1000
999
- value: 38.851
1000
- - type: mrr_at_3
1001
- value: 35.046
1002
- - type: mrr_at_5
1003
- value: 36.548
1004
- - type: ndcg_at_1
1005
- value: 30.237000000000002
1006
- - type: ndcg_at_10
1007
- value: 38.356
1008
- - type: ndcg_at_100
1009
- value: 44.906
1010
- - type: ndcg_at_1000
1011
- value: 47.299
1012
- - type: ndcg_at_3
1013
- value: 33.717999999999996
1014
- - type: ndcg_at_5
1015
- value: 35.946
1016
- - type: precision_at_1
1017
- value: 30.237000000000002
1018
- - type: precision_at_10
1019
- value: 7.292
1020
- - type: precision_at_100
1021
- value: 1.496
1022
- - type: precision_at_1000
1023
- value: 0.23600000000000002
1024
- - type: precision_at_3
1025
- value: 15.547
1026
- - type: precision_at_5
1027
- value: 11.344
1028
- - type: recall_at_1
1029
- value: 24.979000000000003
1030
- - type: recall_at_10
1031
- value: 48.624
1032
- - type: recall_at_100
1033
- value: 77.932
1034
- - type: recall_at_1000
1035
- value: 92.66499999999999
1036
- - type: recall_at_3
1037
- value: 35.217
1038
- - type: recall_at_5
1039
- value: 41.394
1040
- - task:
1041
- type: Retrieval
1042
- dataset:
1043
- type: BeIR/cqadupstack
1044
- name: MTEB CQADupstackWordpressRetrieval
1045
- config: default
1046
- split: test
1047
- revision: None
1048
- metrics:
1049
- - type: map_at_1
1050
- value: 22.566
1051
- - type: map_at_10
1052
- value: 30.945
1053
- - type: map_at_100
1054
- value: 31.759999999999998
1055
- - type: map_at_1000
1056
- value: 31.855
1057
- - type: map_at_3
1058
- value: 28.64
1059
- - type: map_at_5
1060
- value: 29.787000000000003
1061
- - type: mrr_at_1
1062
- value: 24.954
1063
- - type: mrr_at_10
1064
- value: 33.311
1065
- - type: mrr_at_100
1066
- value: 34.050000000000004
1067
- - type: mrr_at_1000
1068
- value: 34.117999999999995
1069
- - type: mrr_at_3
1070
- value: 31.238
1071
- - type: mrr_at_5
1072
- value: 32.329
1073
- - type: ndcg_at_1
1074
- value: 24.954
1075
- - type: ndcg_at_10
1076
- value: 35.676
1077
- - type: ndcg_at_100
1078
- value: 39.931
1079
- - type: ndcg_at_1000
1080
- value: 42.43
1081
- - type: ndcg_at_3
1082
- value: 31.365
1083
- - type: ndcg_at_5
1084
- value: 33.184999999999995
1085
- - type: precision_at_1
1086
- value: 24.954
1087
- - type: precision_at_10
1088
- value: 5.564
1089
- - type: precision_at_100
1090
- value: 0.826
1091
- - type: precision_at_1000
1092
- value: 0.116
1093
- - type: precision_at_3
1094
- value: 13.555
1095
- - type: precision_at_5
1096
- value: 9.168
1097
- - type: recall_at_1
1098
- value: 22.566
1099
- - type: recall_at_10
1100
- value: 47.922
1101
- - type: recall_at_100
1102
- value: 67.931
1103
- - type: recall_at_1000
1104
- value: 86.653
1105
- - type: recall_at_3
1106
- value: 36.103
1107
- - type: recall_at_5
1108
- value: 40.699000000000005
1109
- - task:
1110
- type: Retrieval
1111
- dataset:
1112
- type: climate-fever
1113
- name: MTEB ClimateFEVER
1114
- config: default
1115
- split: test
1116
- revision: None
1117
- metrics:
1118
- - type: map_at_1
1119
- value: 16.950000000000003
1120
- - type: map_at_10
1121
- value: 28.612
1122
- - type: map_at_100
1123
- value: 30.476999999999997
1124
- - type: map_at_1000
1125
- value: 30.674
1126
- - type: map_at_3
1127
- value: 24.262
1128
- - type: map_at_5
1129
- value: 26.554
1130
- - type: mrr_at_1
1131
- value: 38.241
1132
- - type: mrr_at_10
1133
- value: 50.43
1134
- - type: mrr_at_100
1135
- value: 51.059
1136
- - type: mrr_at_1000
1137
- value: 51.090999999999994
1138
- - type: mrr_at_3
1139
- value: 47.514
1140
- - type: mrr_at_5
1141
- value: 49.246
1142
- - type: ndcg_at_1
1143
- value: 38.241
1144
- - type: ndcg_at_10
1145
- value: 38.218
1146
- - type: ndcg_at_100
1147
- value: 45.003
1148
- - type: ndcg_at_1000
1149
- value: 48.269
1150
- - type: ndcg_at_3
1151
- value: 32.568000000000005
1152
- - type: ndcg_at_5
1153
- value: 34.400999999999996
1154
- - type: precision_at_1
1155
- value: 38.241
1156
- - type: precision_at_10
1157
- value: 11.674
1158
- - type: precision_at_100
1159
- value: 1.913
1160
- - type: precision_at_1000
1161
- value: 0.252
1162
- - type: precision_at_3
1163
- value: 24.387
1164
- - type: precision_at_5
1165
- value: 18.163
1166
- - type: recall_at_1
1167
- value: 16.950000000000003
1168
- - type: recall_at_10
1169
- value: 43.769000000000005
1170
- - type: recall_at_100
1171
- value: 66.875
1172
- - type: recall_at_1000
1173
- value: 84.92699999999999
1174
- - type: recall_at_3
1175
- value: 29.353
1176
- - type: recall_at_5
1177
- value: 35.467
1178
- - task:
1179
- type: Retrieval
1180
- dataset:
1181
- type: dbpedia-entity
1182
- name: MTEB DBPedia
1183
- config: default
1184
- split: test
1185
- revision: None
1186
- metrics:
1187
- - type: map_at_1
1188
- value: 9.276
1189
- - type: map_at_10
1190
- value: 20.848
1191
- - type: map_at_100
1192
- value: 29.804000000000002
1193
- - type: map_at_1000
1194
- value: 31.398
1195
- - type: map_at_3
1196
- value: 14.886
1197
- - type: map_at_5
1198
- value: 17.516000000000002
1199
- - type: mrr_at_1
1200
- value: 71
1201
- - type: mrr_at_10
1202
- value: 78.724
1203
- - type: mrr_at_100
1204
- value: 78.976
1205
- - type: mrr_at_1000
1206
- value: 78.986
1207
- - type: mrr_at_3
1208
- value: 77.333
1209
- - type: mrr_at_5
1210
- value: 78.021
1211
- - type: ndcg_at_1
1212
- value: 57.875
1213
- - type: ndcg_at_10
1214
- value: 43.855
1215
- - type: ndcg_at_100
1216
- value: 48.99
1217
- - type: ndcg_at_1000
1218
- value: 56.141
1219
- - type: ndcg_at_3
1220
- value: 48.914
1221
- - type: ndcg_at_5
1222
- value: 45.961
1223
- - type: precision_at_1
1224
- value: 71
1225
- - type: precision_at_10
1226
- value: 34.575
1227
- - type: precision_at_100
1228
- value: 11.182
1229
- - type: precision_at_1000
1230
- value: 2.044
1231
- - type: precision_at_3
1232
- value: 52.5
1233
- - type: precision_at_5
1234
- value: 44.2
1235
- - type: recall_at_1
1236
- value: 9.276
1237
- - type: recall_at_10
1238
- value: 26.501
1239
- - type: recall_at_100
1240
- value: 55.72899999999999
1241
- - type: recall_at_1000
1242
- value: 78.532
1243
- - type: recall_at_3
1244
- value: 16.365
1245
- - type: recall_at_5
1246
- value: 20.154
1247
- - task:
1248
- type: Classification
1249
- dataset:
1250
- type: mteb/emotion
1251
- name: MTEB EmotionClassification
1252
- config: default
1253
- split: test
1254
- revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
1255
- metrics:
1256
- - type: accuracy
1257
- value: 52.71
1258
- - type: f1
1259
- value: 47.74801556489574
1260
- - task:
1261
- type: Retrieval
1262
- dataset:
1263
- type: fever
1264
- name: MTEB FEVER
1265
- config: default
1266
- split: test
1267
- revision: None
1268
- metrics:
1269
- - type: map_at_1
1270
- value: 73.405
1271
- - type: map_at_10
1272
- value: 82.822
1273
- - type: map_at_100
1274
- value: 83.042
1275
- - type: map_at_1000
1276
- value: 83.055
1277
- - type: map_at_3
1278
- value: 81.65299999999999
1279
- - type: map_at_5
1280
- value: 82.431
1281
- - type: mrr_at_1
1282
- value: 79.178
1283
- - type: mrr_at_10
1284
- value: 87.02
1285
- - type: mrr_at_100
1286
- value: 87.095
1287
- - type: mrr_at_1000
1288
- value: 87.09700000000001
1289
- - type: mrr_at_3
1290
- value: 86.309
1291
- - type: mrr_at_5
1292
- value: 86.824
1293
- - type: ndcg_at_1
1294
- value: 79.178
1295
- - type: ndcg_at_10
1296
- value: 86.72
1297
- - type: ndcg_at_100
1298
- value: 87.457
1299
- - type: ndcg_at_1000
1300
- value: 87.691
1301
- - type: ndcg_at_3
1302
- value: 84.974
1303
- - type: ndcg_at_5
1304
- value: 86.032
1305
- - type: precision_at_1
1306
- value: 79.178
1307
- - type: precision_at_10
1308
- value: 10.548
1309
- - type: precision_at_100
1310
- value: 1.113
1311
- - type: precision_at_1000
1312
- value: 0.11499999999999999
1313
- - type: precision_at_3
1314
- value: 32.848
1315
- - type: precision_at_5
1316
- value: 20.45
1317
- - type: recall_at_1
1318
- value: 73.405
1319
- - type: recall_at_10
1320
- value: 94.39699999999999
1321
- - type: recall_at_100
1322
- value: 97.219
1323
- - type: recall_at_1000
1324
- value: 98.675
1325
- - type: recall_at_3
1326
- value: 89.679
1327
- - type: recall_at_5
1328
- value: 92.392
1329
- - task:
1330
- type: Retrieval
1331
- dataset:
1332
- type: fiqa
1333
- name: MTEB FiQA2018
1334
- config: default
1335
- split: test
1336
- revision: None
1337
- metrics:
1338
- - type: map_at_1
1339
- value: 22.651
1340
- - type: map_at_10
1341
- value: 36.886
1342
- - type: map_at_100
1343
- value: 38.811
1344
- - type: map_at_1000
1345
- value: 38.981
1346
- - type: map_at_3
1347
- value: 32.538
1348
- - type: map_at_5
1349
- value: 34.763
1350
- - type: mrr_at_1
1351
- value: 44.444
1352
- - type: mrr_at_10
1353
- value: 53.168000000000006
1354
- - type: mrr_at_100
1355
- value: 53.839000000000006
1356
- - type: mrr_at_1000
1357
- value: 53.869
1358
- - type: mrr_at_3
1359
- value: 50.54
1360
- - type: mrr_at_5
1361
- value: 52.068000000000005
1362
- - type: ndcg_at_1
1363
- value: 44.444
1364
- - type: ndcg_at_10
1365
- value: 44.994
1366
- - type: ndcg_at_100
1367
- value: 51.599
1368
- - type: ndcg_at_1000
1369
- value: 54.339999999999996
1370
- - type: ndcg_at_3
1371
- value: 41.372
1372
- - type: ndcg_at_5
1373
- value: 42.149
1374
- - type: precision_at_1
1375
- value: 44.444
1376
- - type: precision_at_10
1377
- value: 12.407
1378
- - type: precision_at_100
1379
- value: 1.9269999999999998
1380
- - type: precision_at_1000
1381
- value: 0.242
1382
- - type: precision_at_3
1383
- value: 27.726
1384
- - type: precision_at_5
1385
- value: 19.814999999999998
1386
- - type: recall_at_1
1387
- value: 22.651
1388
- - type: recall_at_10
1389
- value: 52.075
1390
- - type: recall_at_100
1391
- value: 76.51400000000001
1392
- - type: recall_at_1000
1393
- value: 92.852
1394
- - type: recall_at_3
1395
- value: 37.236000000000004
1396
- - type: recall_at_5
1397
- value: 43.175999999999995
1398
- - task:
1399
- type: Retrieval
1400
- dataset:
1401
- type: hotpotqa
1402
- name: MTEB HotpotQA
1403
- config: default
1404
- split: test
1405
- revision: None
1406
- metrics:
1407
- - type: map_at_1
1408
- value: 40.777
1409
- - type: map_at_10
1410
- value: 66.79899999999999
1411
- - type: map_at_100
1412
- value: 67.65299999999999
1413
- - type: map_at_1000
1414
- value: 67.706
1415
- - type: map_at_3
1416
- value: 63.352
1417
- - type: map_at_5
1418
- value: 65.52900000000001
1419
- - type: mrr_at_1
1420
- value: 81.553
1421
- - type: mrr_at_10
1422
- value: 86.983
1423
- - type: mrr_at_100
1424
- value: 87.132
1425
- - type: mrr_at_1000
1426
- value: 87.136
1427
- - type: mrr_at_3
1428
- value: 86.156
1429
- - type: mrr_at_5
1430
- value: 86.726
1431
- - type: ndcg_at_1
1432
- value: 81.553
1433
- - type: ndcg_at_10
1434
- value: 74.64
1435
- - type: ndcg_at_100
1436
- value: 77.459
1437
- - type: ndcg_at_1000
1438
- value: 78.43
1439
- - type: ndcg_at_3
1440
- value: 69.878
1441
- - type: ndcg_at_5
1442
- value: 72.59400000000001
1443
- - type: precision_at_1
1444
- value: 81.553
1445
- - type: precision_at_10
1446
- value: 15.654000000000002
1447
- - type: precision_at_100
1448
- value: 1.783
1449
- - type: precision_at_1000
1450
- value: 0.191
1451
- - type: precision_at_3
1452
- value: 45.199
1453
- - type: precision_at_5
1454
- value: 29.267
1455
- - type: recall_at_1
1456
- value: 40.777
1457
- - type: recall_at_10
1458
- value: 78.271
1459
- - type: recall_at_100
1460
- value: 89.129
1461
- - type: recall_at_1000
1462
- value: 95.49
1463
- - type: recall_at_3
1464
- value: 67.79899999999999
1465
- - type: recall_at_5
1466
- value: 73.167
1467
- - task:
1468
- type: Classification
1469
- dataset:
1470
- type: mteb/imdb
1471
- name: MTEB ImdbClassification
1472
- config: default
1473
- split: test
1474
- revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
1475
- metrics:
1476
- - type: accuracy
1477
- value: 93.5064
1478
- - type: ap
1479
- value: 90.25495114444111
1480
- - type: f1
1481
- value: 93.5012434973381
1482
- - task:
1483
- type: Retrieval
1484
- dataset:
1485
- type: msmarco
1486
- name: MTEB MSMARCO
1487
- config: default
1488
- split: dev
1489
- revision: None
1490
- metrics:
1491
- - type: map_at_1
1492
- value: 23.301
1493
- - type: map_at_10
1494
- value: 35.657
1495
- - type: map_at_100
1496
- value: 36.797000000000004
1497
- - type: map_at_1000
1498
- value: 36.844
1499
- - type: map_at_3
1500
- value: 31.743
1501
- - type: map_at_5
1502
- value: 34.003
1503
- - type: mrr_at_1
1504
- value: 23.854
1505
- - type: mrr_at_10
1506
- value: 36.242999999999995
1507
- - type: mrr_at_100
1508
- value: 37.32
1509
- - type: mrr_at_1000
1510
- value: 37.361
1511
- - type: mrr_at_3
1512
- value: 32.4
1513
- - type: mrr_at_5
1514
- value: 34.634
1515
- - type: ndcg_at_1
1516
- value: 23.868000000000002
1517
- - type: ndcg_at_10
1518
- value: 42.589
1519
- - type: ndcg_at_100
1520
- value: 48.031
1521
- - type: ndcg_at_1000
1522
- value: 49.189
1523
- - type: ndcg_at_3
1524
- value: 34.649
1525
- - type: ndcg_at_5
1526
- value: 38.676
1527
- - type: precision_at_1
1528
- value: 23.868000000000002
1529
- - type: precision_at_10
1530
- value: 6.6850000000000005
1531
- - type: precision_at_100
1532
- value: 0.9400000000000001
1533
- - type: precision_at_1000
1534
- value: 0.104
1535
- - type: precision_at_3
1536
- value: 14.651
1537
- - type: precision_at_5
1538
- value: 10.834000000000001
1539
- - type: recall_at_1
1540
- value: 23.301
1541
- - type: recall_at_10
1542
- value: 63.88700000000001
1543
- - type: recall_at_100
1544
- value: 88.947
1545
- - type: recall_at_1000
1546
- value: 97.783
1547
- - type: recall_at_3
1548
- value: 42.393
1549
- - type: recall_at_5
1550
- value: 52.036
1551
- - task:
1552
- type: Classification
1553
- dataset:
1554
- type: mteb/mtop_domain
1555
- name: MTEB MTOPDomainClassification (en)
1556
- config: en
1557
- split: test
1558
- revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
1559
- metrics:
1560
- - type: accuracy
1561
- value: 94.64888280893753
1562
- - type: f1
1563
- value: 94.41310774203512
1564
- - task:
1565
- type: Classification
1566
- dataset:
1567
- type: mteb/mtop_intent
1568
- name: MTEB MTOPIntentClassification (en)
1569
- config: en
1570
- split: test
1571
- revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
1572
- metrics:
1573
- - type: accuracy
1574
- value: 79.72184222526221
1575
- - type: f1
1576
- value: 61.522034067350106
1577
- - task:
1578
- type: Classification
1579
- dataset:
1580
- type: mteb/amazon_massive_intent
1581
- name: MTEB MassiveIntentClassification (en)
1582
- config: en
1583
- split: test
1584
- revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
1585
- metrics:
1586
- - type: accuracy
1587
- value: 79.60659045057163
1588
- - type: f1
1589
- value: 77.268649687049
1590
- - task:
1591
- type: Classification
1592
- dataset:
1593
- type: mteb/amazon_massive_scenario
1594
- name: MTEB MassiveScenarioClassification (en)
1595
- config: en
1596
- split: test
1597
- revision: 7d571f92784cd94a019292a1f45445077d0ef634
1598
- metrics:
1599
- - type: accuracy
1600
- value: 81.83254875588432
1601
- - type: f1
1602
- value: 81.61520635919082
1603
- - task:
1604
- type: Clustering
1605
- dataset:
1606
- type: mteb/medrxiv-clustering-p2p
1607
- name: MTEB MedrxivClusteringP2P
1608
- config: default
1609
- split: test
1610
- revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
1611
- metrics:
1612
- - type: v_measure
1613
- value: 36.31529875009507
1614
- - task:
1615
- type: Clustering
1616
- dataset:
1617
- type: mteb/medrxiv-clustering-s2s
1618
- name: MTEB MedrxivClusteringS2S
1619
- config: default
1620
- split: test
1621
- revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
1622
- metrics:
1623
- - type: v_measure
1624
- value: 31.734233714415073
1625
- - task:
1626
- type: Reranking
1627
- dataset:
1628
- type: mteb/mind_small
1629
- name: MTEB MindSmallReranking
1630
- config: default
1631
- split: test
1632
- revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
1633
- metrics:
1634
- - type: map
1635
- value: 30.994501713009452
1636
- - type: mrr
1637
- value: 32.13512850703073
1638
- - task:
1639
- type: Retrieval
1640
- dataset:
1641
- type: nfcorpus
1642
- name: MTEB NFCorpus
1643
- config: default
1644
- split: test
1645
- revision: None
1646
- metrics:
1647
- - type: map_at_1
1648
- value: 6.603000000000001
1649
- - type: map_at_10
1650
- value: 13.767999999999999
1651
- - type: map_at_100
1652
- value: 17.197000000000003
1653
- - type: map_at_1000
1654
- value: 18.615000000000002
1655
- - type: map_at_3
1656
- value: 10.567
1657
- - type: map_at_5
1658
- value: 12.078999999999999
1659
- - type: mrr_at_1
1660
- value: 44.891999999999996
1661
- - type: mrr_at_10
1662
- value: 53.75299999999999
1663
- - type: mrr_at_100
1664
- value: 54.35
1665
- - type: mrr_at_1000
1666
- value: 54.388000000000005
1667
- - type: mrr_at_3
1668
- value: 51.495999999999995
1669
- - type: mrr_at_5
1670
- value: 52.688
1671
- - type: ndcg_at_1
1672
- value: 43.189
1673
- - type: ndcg_at_10
1674
- value: 34.567
1675
- - type: ndcg_at_100
1676
- value: 32.273
1677
- - type: ndcg_at_1000
1678
- value: 41.321999999999996
1679
- - type: ndcg_at_3
1680
- value: 40.171
1681
- - type: ndcg_at_5
1682
- value: 37.502
1683
- - type: precision_at_1
1684
- value: 44.582
1685
- - type: precision_at_10
1686
- value: 25.139
1687
- - type: precision_at_100
1688
- value: 7.739999999999999
1689
- - type: precision_at_1000
1690
- value: 2.054
1691
- - type: precision_at_3
1692
- value: 37.152
1693
- - type: precision_at_5
1694
- value: 31.826999999999998
1695
- - type: recall_at_1
1696
- value: 6.603000000000001
1697
- - type: recall_at_10
1698
- value: 17.023
1699
- - type: recall_at_100
1700
- value: 32.914
1701
- - type: recall_at_1000
1702
- value: 64.44800000000001
1703
- - type: recall_at_3
1704
- value: 11.457
1705
- - type: recall_at_5
1706
- value: 13.816
1707
- - task:
1708
- type: Retrieval
1709
- dataset:
1710
- type: nq
1711
- name: MTEB NQ
1712
- config: default
1713
- split: test
1714
- revision: None
1715
- metrics:
1716
- - type: map_at_1
1717
- value: 30.026000000000003
1718
- - type: map_at_10
1719
- value: 45.429
1720
- - type: map_at_100
1721
- value: 46.45
1722
- - type: map_at_1000
1723
- value: 46.478
1724
- - type: map_at_3
1725
- value: 41.147
1726
- - type: map_at_5
1727
- value: 43.627
1728
- - type: mrr_at_1
1729
- value: 33.951
1730
- - type: mrr_at_10
1731
- value: 47.953
1732
- - type: mrr_at_100
1733
- value: 48.731
1734
- - type: mrr_at_1000
1735
- value: 48.751
1736
- - type: mrr_at_3
1737
- value: 44.39
1738
- - type: mrr_at_5
1739
- value: 46.533
1740
- - type: ndcg_at_1
1741
- value: 33.951
1742
- - type: ndcg_at_10
1743
- value: 53.24100000000001
1744
- - type: ndcg_at_100
1745
- value: 57.599999999999994
1746
- - type: ndcg_at_1000
1747
- value: 58.270999999999994
1748
- - type: ndcg_at_3
1749
- value: 45.190999999999995
1750
- - type: ndcg_at_5
1751
- value: 49.339
1752
- - type: precision_at_1
1753
- value: 33.951
1754
- - type: precision_at_10
1755
- value: 8.856
1756
- - type: precision_at_100
1757
- value: 1.133
1758
- - type: precision_at_1000
1759
- value: 0.12
1760
- - type: precision_at_3
1761
- value: 20.713
1762
- - type: precision_at_5
1763
- value: 14.838000000000001
1764
- - type: recall_at_1
1765
- value: 30.026000000000003
1766
- - type: recall_at_10
1767
- value: 74.512
1768
- - type: recall_at_100
1769
- value: 93.395
1770
- - type: recall_at_1000
1771
- value: 98.402
1772
- - type: recall_at_3
1773
- value: 53.677
1774
- - type: recall_at_5
1775
- value: 63.198
1776
- - task:
1777
- type: Retrieval
1778
- dataset:
1779
- type: quora
1780
- name: MTEB QuoraRetrieval
1781
- config: default
1782
- split: test
1783
- revision: None
1784
- metrics:
1785
- - type: map_at_1
1786
- value: 71.41300000000001
1787
- - type: map_at_10
1788
- value: 85.387
1789
- - type: map_at_100
1790
- value: 86.027
1791
- - type: map_at_1000
1792
- value: 86.041
1793
- - type: map_at_3
1794
- value: 82.543
1795
- - type: map_at_5
1796
- value: 84.304
1797
- - type: mrr_at_1
1798
- value: 82.35
1799
- - type: mrr_at_10
1800
- value: 88.248
1801
- - type: mrr_at_100
1802
- value: 88.348
1803
- - type: mrr_at_1000
1804
- value: 88.349
1805
- - type: mrr_at_3
1806
- value: 87.348
1807
- - type: mrr_at_5
1808
- value: 87.96300000000001
1809
- - type: ndcg_at_1
1810
- value: 82.37
1811
- - type: ndcg_at_10
1812
- value: 88.98
1813
- - type: ndcg_at_100
1814
- value: 90.16499999999999
1815
- - type: ndcg_at_1000
1816
- value: 90.239
1817
- - type: ndcg_at_3
1818
- value: 86.34100000000001
1819
- - type: ndcg_at_5
1820
- value: 87.761
1821
- - type: precision_at_1
1822
- value: 82.37
1823
- - type: precision_at_10
1824
- value: 13.471
1825
- - type: precision_at_100
1826
- value: 1.534
1827
- - type: precision_at_1000
1828
- value: 0.157
1829
- - type: precision_at_3
1830
- value: 37.827
1831
- - type: precision_at_5
1832
- value: 24.773999999999997
1833
- - type: recall_at_1
1834
- value: 71.41300000000001
1835
- - type: recall_at_10
1836
- value: 95.748
1837
- - type: recall_at_100
1838
- value: 99.69200000000001
1839
- - type: recall_at_1000
1840
- value: 99.98
1841
- - type: recall_at_3
1842
- value: 87.996
1843
- - type: recall_at_5
1844
- value: 92.142
1845
- - task:
1846
- type: Clustering
1847
- dataset:
1848
- type: mteb/reddit-clustering
1849
- name: MTEB RedditClustering
1850
- config: default
1851
- split: test
1852
- revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1853
- metrics:
1854
- - type: v_measure
1855
- value: 56.96878497780007
1856
- - task:
1857
- type: Clustering
1858
- dataset:
1859
- type: mteb/reddit-clustering-p2p
1860
- name: MTEB RedditClusteringP2P
1861
- config: default
1862
- split: test
1863
- revision: 282350215ef01743dc01b456c7f5241fa8937f16
1864
- metrics:
1865
- - type: v_measure
1866
- value: 65.31371347128074
1867
- - task:
1868
- type: Retrieval
1869
- dataset:
1870
- type: scidocs
1871
- name: MTEB SCIDOCS
1872
- config: default
1873
- split: test
1874
- revision: None
1875
- metrics:
1876
- - type: map_at_1
1877
- value: 5.287
1878
- - type: map_at_10
1879
- value: 13.530000000000001
1880
- - type: map_at_100
1881
- value: 15.891
1882
- - type: map_at_1000
1883
- value: 16.245
1884
- - type: map_at_3
1885
- value: 9.612
1886
- - type: map_at_5
1887
- value: 11.672
1888
- - type: mrr_at_1
1889
- value: 26
1890
- - type: mrr_at_10
1891
- value: 37.335
1892
- - type: mrr_at_100
1893
- value: 38.443
1894
- - type: mrr_at_1000
1895
- value: 38.486
1896
- - type: mrr_at_3
1897
- value: 33.783
1898
- - type: mrr_at_5
1899
- value: 36.028
1900
- - type: ndcg_at_1
1901
- value: 26
1902
- - type: ndcg_at_10
1903
- value: 22.215
1904
- - type: ndcg_at_100
1905
- value: 31.101
1906
- - type: ndcg_at_1000
1907
- value: 36.809
1908
- - type: ndcg_at_3
1909
- value: 21.104
1910
- - type: ndcg_at_5
1911
- value: 18.759999999999998
1912
- - type: precision_at_1
1913
- value: 26
1914
- - type: precision_at_10
1915
- value: 11.43
1916
- - type: precision_at_100
1917
- value: 2.424
1918
- - type: precision_at_1000
1919
- value: 0.379
1920
- - type: precision_at_3
1921
- value: 19.7
1922
- - type: precision_at_5
1923
- value: 16.619999999999997
1924
- - type: recall_at_1
1925
- value: 5.287
1926
- - type: recall_at_10
1927
- value: 23.18
1928
- - type: recall_at_100
1929
- value: 49.208
1930
- - type: recall_at_1000
1931
- value: 76.85300000000001
1932
- - type: recall_at_3
1933
- value: 11.991999999999999
1934
- - type: recall_at_5
1935
- value: 16.85
1936
- - task:
1937
- type: STS
1938
- dataset:
1939
- type: mteb/sickr-sts
1940
- name: MTEB SICK-R
1941
- config: default
1942
- split: test
1943
- revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
1944
- metrics:
1945
- - type: cos_sim_pearson
1946
- value: 83.87834913790886
1947
- - type: cos_sim_spearman
1948
- value: 81.04583513112122
1949
- - type: euclidean_pearson
1950
- value: 81.20484174558065
1951
- - type: euclidean_spearman
1952
- value: 80.76430832561769
1953
- - type: manhattan_pearson
1954
- value: 81.21416730978615
1955
- - type: manhattan_spearman
1956
- value: 80.7797637394211
1957
- - task:
1958
- type: STS
1959
- dataset:
1960
- type: mteb/sts12-sts
1961
- name: MTEB STS12
1962
- config: default
1963
- split: test
1964
- revision: a0d554a64d88156834ff5ae9920b964011b16384
1965
- metrics:
1966
- - type: cos_sim_pearson
1967
- value: 86.56143998865157
1968
- - type: cos_sim_spearman
1969
- value: 79.75387012744471
1970
- - type: euclidean_pearson
1971
- value: 83.7877519997019
1972
- - type: euclidean_spearman
1973
- value: 79.90489748003296
1974
- - type: manhattan_pearson
1975
- value: 83.7540590666095
1976
- - type: manhattan_spearman
1977
- value: 79.86434577931573
1978
- - task:
1979
- type: STS
1980
- dataset:
1981
- type: mteb/sts13-sts
1982
- name: MTEB STS13
1983
- config: default
1984
- split: test
1985
- revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1986
- metrics:
1987
- - type: cos_sim_pearson
1988
- value: 83.92102564177941
1989
- - type: cos_sim_spearman
1990
- value: 84.98234585939103
1991
- - type: euclidean_pearson
1992
- value: 84.47729567593696
1993
- - type: euclidean_spearman
1994
- value: 85.09490696194469
1995
- - type: manhattan_pearson
1996
- value: 84.38622951588229
1997
- - type: manhattan_spearman
1998
- value: 85.02507171545574
1999
- - task:
2000
- type: STS
2001
- dataset:
2002
- type: mteb/sts14-sts
2003
- name: MTEB STS14
2004
- config: default
2005
- split: test
2006
- revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
2007
- metrics:
2008
- - type: cos_sim_pearson
2009
- value: 80.1891164763377
2010
- - type: cos_sim_spearman
2011
- value: 80.7997969966883
2012
- - type: euclidean_pearson
2013
- value: 80.48572256162396
2014
- - type: euclidean_spearman
2015
- value: 80.57851903536378
2016
- - type: manhattan_pearson
2017
- value: 80.4324819433651
2018
- - type: manhattan_spearman
2019
- value: 80.5074526239062
2020
- - task:
2021
- type: STS
2022
- dataset:
2023
- type: mteb/sts15-sts
2024
- name: MTEB STS15
2025
- config: default
2026
- split: test
2027
- revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
2028
- metrics:
2029
- - type: cos_sim_pearson
2030
- value: 82.64319975116025
2031
- - type: cos_sim_spearman
2032
- value: 84.88671197763652
2033
- - type: euclidean_pearson
2034
- value: 84.74692193293231
2035
- - type: euclidean_spearman
2036
- value: 85.27151722073653
2037
- - type: manhattan_pearson
2038
- value: 84.72460516785438
2039
- - type: manhattan_spearman
2040
- value: 85.26518899786687
2041
- - task:
2042
- type: STS
2043
- dataset:
2044
- type: mteb/sts16-sts
2045
- name: MTEB STS16
2046
- config: default
2047
- split: test
2048
- revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
2049
- metrics:
2050
- - type: cos_sim_pearson
2051
- value: 83.24687565822381
2052
- - type: cos_sim_spearman
2053
- value: 85.60418454111263
2054
- - type: euclidean_pearson
2055
- value: 84.85829740169851
2056
- - type: euclidean_spearman
2057
- value: 85.66378014138306
2058
- - type: manhattan_pearson
2059
- value: 84.84672408808835
2060
- - type: manhattan_spearman
2061
- value: 85.63331924364891
2062
- - task:
2063
- type: STS
2064
- dataset:
2065
- type: mteb/sts17-crosslingual-sts
2066
- name: MTEB STS17 (en-en)
2067
- config: en-en
2068
- split: test
2069
- revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
2070
- metrics:
2071
- - type: cos_sim_pearson
2072
- value: 84.87758895415485
2073
- - type: cos_sim_spearman
2074
- value: 85.8193745617297
2075
- - type: euclidean_pearson
2076
- value: 85.78719118848134
2077
- - type: euclidean_spearman
2078
- value: 84.35797575385688
2079
- - type: manhattan_pearson
2080
- value: 85.97919844815692
2081
- - type: manhattan_spearman
2082
- value: 84.58334745175151
2083
- - task:
2084
- type: STS
2085
- dataset:
2086
- type: mteb/sts22-crosslingual-sts
2087
- name: MTEB STS22 (en)
2088
- config: en
2089
- split: test
2090
- revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
2091
- metrics:
2092
- - type: cos_sim_pearson
2093
- value: 67.27076035963599
2094
- - type: cos_sim_spearman
2095
- value: 67.21433656439973
2096
- - type: euclidean_pearson
2097
- value: 68.07434078679324
2098
- - type: euclidean_spearman
2099
- value: 66.0249731719049
2100
- - type: manhattan_pearson
2101
- value: 67.95495198947476
2102
- - type: manhattan_spearman
2103
- value: 65.99893908331886
2104
- - task:
2105
- type: STS
2106
- dataset:
2107
- type: mteb/stsbenchmark-sts
2108
- name: MTEB STSBenchmark
2109
- config: default
2110
- split: test
2111
- revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
2112
- metrics:
2113
- - type: cos_sim_pearson
2114
- value: 82.22437747056817
2115
- - type: cos_sim_spearman
2116
- value: 85.0995685206174
2117
- - type: euclidean_pearson
2118
- value: 84.08616925603394
2119
- - type: euclidean_spearman
2120
- value: 84.89633925691658
2121
- - type: manhattan_pearson
2122
- value: 84.08332675923133
2123
- - type: manhattan_spearman
2124
- value: 84.8858228112915
2125
- - task:
2126
- type: Reranking
2127
- dataset:
2128
- type: mteb/scidocs-reranking
2129
- name: MTEB SciDocsRR
2130
- config: default
2131
- split: test
2132
- revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
2133
- metrics:
2134
- - type: map
2135
- value: 87.6909022589666
2136
- - type: mrr
2137
- value: 96.43341952165481
2138
- - task:
2139
- type: Retrieval
2140
- dataset:
2141
- type: scifact
2142
- name: MTEB SciFact
2143
- config: default
2144
- split: test
2145
- revision: None
2146
- metrics:
2147
- - type: map_at_1
2148
- value: 57.660999999999994
2149
- - type: map_at_10
2150
- value: 67.625
2151
- - type: map_at_100
2152
- value: 68.07600000000001
2153
- - type: map_at_1000
2154
- value: 68.10199999999999
2155
- - type: map_at_3
2156
- value: 64.50399999999999
2157
- - type: map_at_5
2158
- value: 66.281
2159
- - type: mrr_at_1
2160
- value: 61
2161
- - type: mrr_at_10
2162
- value: 68.953
2163
- - type: mrr_at_100
2164
- value: 69.327
2165
- - type: mrr_at_1000
2166
- value: 69.352
2167
- - type: mrr_at_3
2168
- value: 66.833
2169
- - type: mrr_at_5
2170
- value: 68.05
2171
- - type: ndcg_at_1
2172
- value: 61
2173
- - type: ndcg_at_10
2174
- value: 72.369
2175
- - type: ndcg_at_100
2176
- value: 74.237
2177
- - type: ndcg_at_1000
2178
- value: 74.939
2179
- - type: ndcg_at_3
2180
- value: 67.284
2181
- - type: ndcg_at_5
2182
- value: 69.72500000000001
2183
- - type: precision_at_1
2184
- value: 61
2185
- - type: precision_at_10
2186
- value: 9.733
2187
- - type: precision_at_100
2188
- value: 1.0670000000000002
2189
- - type: precision_at_1000
2190
- value: 0.11199999999999999
2191
- - type: precision_at_3
2192
- value: 26.222
2193
- - type: precision_at_5
2194
- value: 17.4
2195
- - type: recall_at_1
2196
- value: 57.660999999999994
2197
- - type: recall_at_10
2198
- value: 85.656
2199
- - type: recall_at_100
2200
- value: 93.833
2201
- - type: recall_at_1000
2202
- value: 99.333
2203
- - type: recall_at_3
2204
- value: 71.961
2205
- - type: recall_at_5
2206
- value: 78.094
2207
- - task:
2208
- type: PairClassification
2209
- dataset:
2210
- type: mteb/sprintduplicatequestions-pairclassification
2211
- name: MTEB SprintDuplicateQuestions
2212
- config: default
2213
- split: test
2214
- revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
2215
- metrics:
2216
- - type: cos_sim_accuracy
2217
- value: 99.86930693069307
2218
- - type: cos_sim_ap
2219
- value: 96.76685487950894
2220
- - type: cos_sim_f1
2221
- value: 93.44587884806354
2222
- - type: cos_sim_precision
2223
- value: 92.80078895463511
2224
- - type: cos_sim_recall
2225
- value: 94.1
2226
- - type: dot_accuracy
2227
- value: 99.54356435643564
2228
- - type: dot_ap
2229
- value: 81.18659960405607
2230
- - type: dot_f1
2231
- value: 75.78008915304605
2232
- - type: dot_precision
2233
- value: 75.07360157016683
2234
- - type: dot_recall
2235
- value: 76.5
2236
- - type: euclidean_accuracy
2237
- value: 99.87326732673267
2238
- - type: euclidean_ap
2239
- value: 96.8102411908941
2240
- - type: euclidean_f1
2241
- value: 93.6127744510978
2242
- - type: euclidean_precision
2243
- value: 93.42629482071713
2244
- - type: euclidean_recall
2245
- value: 93.8
2246
- - type: manhattan_accuracy
2247
- value: 99.87425742574257
2248
- - type: manhattan_ap
2249
- value: 96.82857341435529
2250
- - type: manhattan_f1
2251
- value: 93.62129583124059
2252
- - type: manhattan_precision
2253
- value: 94.04641775983855
2254
- - type: manhattan_recall
2255
- value: 93.2
2256
- - type: max_accuracy
2257
- value: 99.87425742574257
2258
- - type: max_ap
2259
- value: 96.82857341435529
2260
- - type: max_f1
2261
- value: 93.62129583124059
2262
- - task:
2263
- type: Clustering
2264
- dataset:
2265
- type: mteb/stackexchange-clustering
2266
- name: MTEB StackExchangeClustering
2267
- config: default
2268
- split: test
2269
- revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
2270
- metrics:
2271
- - type: v_measure
2272
- value: 65.92560972698926
2273
- - task:
2274
- type: Clustering
2275
- dataset:
2276
- type: mteb/stackexchange-clustering-p2p
2277
- name: MTEB StackExchangeClusteringP2P
2278
- config: default
2279
- split: test
2280
- revision: 815ca46b2622cec33ccafc3735d572c266efdb44
2281
- metrics:
2282
- - type: v_measure
2283
- value: 34.92797240259008
2284
- - task:
2285
- type: Reranking
2286
- dataset:
2287
- type: mteb/stackoverflowdupquestions-reranking
2288
- name: MTEB StackOverflowDupQuestions
2289
- config: default
2290
- split: test
2291
- revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
2292
- metrics:
2293
- - type: map
2294
- value: 55.244624045597654
2295
- - type: mrr
2296
- value: 56.185303666921314
2297
- - task:
2298
- type: Summarization
2299
- dataset:
2300
- type: mteb/summeval
2301
- name: MTEB SummEval
2302
- config: default
2303
- split: test
2304
- revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
2305
- metrics:
2306
- - type: cos_sim_pearson
2307
- value: 31.02491987312937
2308
- - type: cos_sim_spearman
2309
- value: 32.055592206679734
2310
- - type: dot_pearson
2311
- value: 24.731627575422557
2312
- - type: dot_spearman
2313
- value: 24.308029077069733
2314
- - task:
2315
- type: Retrieval
2316
- dataset:
2317
- type: trec-covid
2318
- name: MTEB TRECCOVID
2319
- config: default
2320
- split: test
2321
- revision: None
2322
- metrics:
2323
- - type: map_at_1
2324
- value: 0.231
2325
- - type: map_at_10
2326
- value: 1.899
2327
- - type: map_at_100
2328
- value: 9.498
2329
- - type: map_at_1000
2330
- value: 20.979999999999997
2331
- - type: map_at_3
2332
- value: 0.652
2333
- - type: map_at_5
2334
- value: 1.069
2335
- - type: mrr_at_1
2336
- value: 88
2337
- - type: mrr_at_10
2338
- value: 93.4
2339
- - type: mrr_at_100
2340
- value: 93.4
2341
- - type: mrr_at_1000
2342
- value: 93.4
2343
- - type: mrr_at_3
2344
- value: 93
2345
- - type: mrr_at_5
2346
- value: 93.4
2347
- - type: ndcg_at_1
2348
- value: 86
2349
- - type: ndcg_at_10
2350
- value: 75.375
2351
- - type: ndcg_at_100
2352
- value: 52.891999999999996
2353
- - type: ndcg_at_1000
2354
- value: 44.952999999999996
2355
- - type: ndcg_at_3
2356
- value: 81.05
2357
- - type: ndcg_at_5
2358
- value: 80.175
2359
- - type: precision_at_1
2360
- value: 88
2361
- - type: precision_at_10
2362
- value: 79
2363
- - type: precision_at_100
2364
- value: 53.16
2365
- - type: precision_at_1000
2366
- value: 19.408
2367
- - type: precision_at_3
2368
- value: 85.333
2369
- - type: precision_at_5
2370
- value: 84
2371
- - type: recall_at_1
2372
- value: 0.231
2373
- - type: recall_at_10
2374
- value: 2.078
2375
- - type: recall_at_100
2376
- value: 12.601
2377
- - type: recall_at_1000
2378
- value: 41.296
2379
- - type: recall_at_3
2380
- value: 0.6779999999999999
2381
- - type: recall_at_5
2382
- value: 1.1360000000000001
2383
- - task:
2384
- type: Retrieval
2385
- dataset:
2386
- type: webis-touche2020
2387
- name: MTEB Touche2020
2388
- config: default
2389
- split: test
2390
- revision: None
2391
- metrics:
2392
- - type: map_at_1
2393
- value: 2.782
2394
- - type: map_at_10
2395
- value: 10.204
2396
- - type: map_at_100
2397
- value: 16.176
2398
- - type: map_at_1000
2399
- value: 17.456
2400
- - type: map_at_3
2401
- value: 5.354
2402
- - type: map_at_5
2403
- value: 7.503
2404
- - type: mrr_at_1
2405
- value: 40.816
2406
- - type: mrr_at_10
2407
- value: 54.010000000000005
2408
- - type: mrr_at_100
2409
- value: 54.49
2410
- - type: mrr_at_1000
2411
- value: 54.49
2412
- - type: mrr_at_3
2413
- value: 48.980000000000004
2414
- - type: mrr_at_5
2415
- value: 51.735
2416
- - type: ndcg_at_1
2417
- value: 36.735
2418
- - type: ndcg_at_10
2419
- value: 26.61
2420
- - type: ndcg_at_100
2421
- value: 36.967
2422
- - type: ndcg_at_1000
2423
- value: 47.274
2424
- - type: ndcg_at_3
2425
- value: 30.363
2426
- - type: ndcg_at_5
2427
- value: 29.448999999999998
2428
- - type: precision_at_1
2429
- value: 40.816
2430
- - type: precision_at_10
2431
- value: 23.878
2432
- - type: precision_at_100
2433
- value: 7.693999999999999
2434
- - type: precision_at_1000
2435
- value: 1.4489999999999998
2436
- - type: precision_at_3
2437
- value: 31.293
2438
- - type: precision_at_5
2439
- value: 29.796
2440
- - type: recall_at_1
2441
- value: 2.782
2442
- - type: recall_at_10
2443
- value: 16.485
2444
- - type: recall_at_100
2445
- value: 46.924
2446
- - type: recall_at_1000
2447
- value: 79.365
2448
- - type: recall_at_3
2449
- value: 6.52
2450
- - type: recall_at_5
2451
- value: 10.48
2452
- - task:
2453
- type: Classification
2454
- dataset:
2455
- type: mteb/toxic_conversations_50k
2456
- name: MTEB ToxicConversationsClassification
2457
- config: default
2458
- split: test
2459
- revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
2460
- metrics:
2461
- - type: accuracy
2462
- value: 70.08300000000001
2463
- - type: ap
2464
- value: 13.91559884590195
2465
- - type: f1
2466
- value: 53.956838444291364
2467
- - task:
2468
- type: Classification
2469
- dataset:
2470
- type: mteb/tweet_sentiment_extraction
2471
- name: MTEB TweetSentimentExtractionClassification
2472
- config: default
2473
- split: test
2474
- revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
2475
- metrics:
2476
- - type: accuracy
2477
- value: 59.34069043576683
2478
- - type: f1
2479
- value: 59.662041994618406
2480
- - task:
2481
- type: Clustering
2482
- dataset:
2483
- type: mteb/twentynewsgroups-clustering
2484
- name: MTEB TwentyNewsgroupsClustering
2485
- config: default
2486
- split: test
2487
- revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
2488
- metrics:
2489
- - type: v_measure
2490
- value: 53.70780611078653
2491
- - task:
2492
- type: PairClassification
2493
- dataset:
2494
- type: mteb/twittersemeval2015-pairclassification
2495
- name: MTEB TwitterSemEval2015
2496
- config: default
2497
- split: test
2498
- revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
2499
- metrics:
2500
- - type: cos_sim_accuracy
2501
- value: 87.10734934732073
2502
- - type: cos_sim_ap
2503
- value: 77.58349999516054
2504
- - type: cos_sim_f1
2505
- value: 70.25391395868965
2506
- - type: cos_sim_precision
2507
- value: 70.06035161374967
2508
- - type: cos_sim_recall
2509
- value: 70.44854881266491
2510
- - type: dot_accuracy
2511
- value: 80.60439887941826
2512
- - type: dot_ap
2513
- value: 54.52935200483575
2514
- - type: dot_f1
2515
- value: 54.170444242973716
2516
- - type: dot_precision
2517
- value: 47.47715534366309
2518
- - type: dot_recall
2519
- value: 63.06068601583114
2520
- - type: euclidean_accuracy
2521
- value: 87.26828396018358
2522
- - type: euclidean_ap
2523
- value: 78.00158454104036
2524
- - type: euclidean_f1
2525
- value: 70.70292457670601
2526
- - type: euclidean_precision
2527
- value: 68.79680479281079
2528
- - type: euclidean_recall
2529
- value: 72.71767810026385
2530
- - type: manhattan_accuracy
2531
- value: 87.11330988853788
2532
- - type: manhattan_ap
2533
- value: 77.92527099601855
2534
- - type: manhattan_f1
2535
- value: 70.76488706365502
2536
- - type: manhattan_precision
2537
- value: 68.89055472263868
2538
- - type: manhattan_recall
2539
- value: 72.74406332453826
2540
- - type: max_accuracy
2541
- value: 87.26828396018358
2542
- - type: max_ap
2543
- value: 78.00158454104036
2544
- - type: max_f1
2545
- value: 70.76488706365502
2546
- - task:
2547
- type: PairClassification
2548
- dataset:
2549
- type: mteb/twitterurlcorpus-pairclassification
2550
- name: MTEB TwitterURLCorpus
2551
- config: default
2552
- split: test
2553
- revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
2554
- metrics:
2555
- - type: cos_sim_accuracy
2556
- value: 87.80804905499282
2557
- - type: cos_sim_ap
2558
- value: 83.06187782630936
2559
- - type: cos_sim_f1
2560
- value: 74.99716435403985
2561
- - type: cos_sim_precision
2562
- value: 73.67951860931579
2563
- - type: cos_sim_recall
2564
- value: 76.36279642747151
2565
- - type: dot_accuracy
2566
- value: 81.83141227151008
2567
- - type: dot_ap
2568
- value: 67.18241090841795
2569
- - type: dot_f1
2570
- value: 62.216037571751606
2571
- - type: dot_precision
2572
- value: 56.749381227391005
2573
- - type: dot_recall
2574
- value: 68.84816753926701
2575
- - type: euclidean_accuracy
2576
- value: 87.91671517832887
2577
- - type: euclidean_ap
2578
- value: 83.56538942001427
2579
- - type: euclidean_f1
2580
- value: 75.7327253337256
2581
- - type: euclidean_precision
2582
- value: 72.48856036606828
2583
- - type: euclidean_recall
2584
- value: 79.28087465352634
2585
- - type: manhattan_accuracy
2586
- value: 87.86626304963713
2587
- - type: manhattan_ap
2588
- value: 83.52939841172832
2589
- - type: manhattan_f1
2590
- value: 75.73635656329888
2591
- - type: manhattan_precision
2592
- value: 72.99150182103836
2593
- - type: manhattan_recall
2594
- value: 78.69571912534647
2595
- - type: max_accuracy
2596
- value: 87.91671517832887
2597
- - type: max_ap
2598
- value: 83.56538942001427
2599
- - type: max_f1
2600
- value: 75.73635656329888
2601
  license: mit
2602
  language:
2603
  - en
2604
- pipeline_tag: sentence-similarity
2605
  ---
2606
 
 
2607
  <h1 align="center">FlagEmbedding</h1>
2608
 
2609
 
@@ -2613,11 +20,14 @@ pipeline_tag: sentence-similarity
2613
  <a href=#usage>Usage</a> |
2614
  <a href="#evaluation">Evaluation</a> |
2615
  <a href="#train">Train</a> |
 
2616
  <a href="#license">License</a>
2617
  <p>
2618
  </h4>
2619
 
2620
- For more details please refer to our GitHub repo: [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding).
 
 
2621
 
2622
  [English](README.md) | [中文](https://github.com/FlagOpen/FlagEmbedding/blob/master/README_zh.md)
2623
 
@@ -2625,6 +35,11 @@ FlagEmbedding can map any text to a low-dimensional dense vector which can be us
2625
  And it also can be used in vector databases for LLMs.
2626
 
2627
  ************* 🌟**Updates**🌟 *************
 
 
 
 
 
2628
  - 08/05/2023: Release base-scale and small-scale models, **best performance among the models of the same size 🤗**
2629
  - 08/02/2023: Release `bge-large-*`(short for BAAI General Embedding) Models, **rank 1st on MTEB and C-MTEB benchmark!** :tada: :tada:
2630
  - 08/01/2023: We release the [Chinese Massive Text Embedding Benchmark](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB) (**C-MTEB**), consisting of 31 test dataset.
@@ -2634,88 +49,182 @@ And it also can be used in vector databases for LLMs.
2634
 
2635
  `bge` is short for `BAAI general embedding`.
2636
 
2637
- | Model | Language | Description | query instruction for retrieval |
2638
- |:-------------------------------|:--------:| :--------:| :--------:|
2639
- | [BAAI/bge-large-en](https://huggingface.co/BAAI/bge-large-en) | English | :trophy: rank **1st** in [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard | `Represent this sentence for searching relevant passages: ` |
2640
- | [BAAI/bge-base-en](https://huggingface.co/BAAI/bge-base-en) | English | rank **2nd** in [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard | `Represent this sentence for searching relevant passages: ` |
2641
- | [BAAI/bge-small-en](https://huggingface.co/BAAI/bge-small-en) | English | a small-scale model but with competitive performance | `Represent this sentence for searching relevant passages: ` |
2642
- | [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh) | Chinese | :trophy: rank **1st** in [C-MTEB](https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB) benchmark | `为这个句子生成表示以用于检索相关文章:` |
2643
- | [BAAI/bge-large-zh-noinstruct](https://huggingface.co/BAAI/bge-large-zh-noinstruct) | Chinese | This model is trained without instruction, and rank **2nd** in [C-MTEB](https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB) benchmark | |
2644
- | [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh) | Chinese | a base-scale model but has similar ability with `bge-large-zh` | `为这个句子生成表示以用于检索相关文章:` |
2645
- | [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh) | Chinese | a small-scale model but with competitive performance | `为这个句子生成表示以用于检索相关文章:` |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2646
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2647
 
2648
 
2649
  ## Usage
2650
 
2651
- * **Using FlagEmbedding**
 
 
 
 
 
2652
  ```
2653
  pip install -U FlagEmbedding
2654
  ```
2655
- See [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md) for more methods to install FlagEmbedding.
2656
 
2657
  ```python
2658
  from FlagEmbedding import FlagModel
2659
- sentences = ["样例数据-1", "样例数据-2"]
 
2660
  model = FlagModel('BAAI/bge-large-zh', query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:")
2661
- embeddings = model.encode(sentences)
2662
- print(embeddings)
2663
- # for retrieval task, please use encode_queries() which will automatically add the instruction to each query
2664
- # corpus in retrieval task can still use encode() or encode_corpus()
 
 
 
2665
  queries = ['query_1', 'query_2']
2666
- passages = ["样例段落-1", "样例段落-2"]
2667
  q_embeddings = model.encode_queries(queries)
2668
  p_embeddings = model.encode(passages)
2669
  scores = q_embeddings @ p_embeddings.T
2670
  ```
2671
- The value of argument `query_instruction_for_retrieval` see [Model List](https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list).
2672
 
2673
- FlagModel will use all available GPUs when encoding, please set `os.environ["CUDA_VISIBLE_DEVICES"]` to choose GPU.
 
2674
 
2675
 
2676
- * **Using Sentence-Transformers**
2677
 
2678
- Using this model also is easy when you have [sentence-transformers](https://www.SBERT.net) installed:
2679
 
2680
  ```
2681
  pip install -U sentence-transformers
2682
  ```
2683
  ```python
2684
  from sentence_transformers import SentenceTransformer
2685
- sentences = ["样例数据-1", "样例数据-2"]
 
2686
  model = SentenceTransformer('BAAI/bge-large-zh')
2687
- embeddings = model.encode(sentences, normalize_embeddings=True)
2688
- print(embeddings)
 
 
2689
  ```
2690
- For retrieval task,
2691
- each query should start with an instruction (instructions see [Model List](https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list)).
 
2692
  ```python
2693
  from sentence_transformers import SentenceTransformer
2694
- queries = ["手机开不了机怎么办?"]
2695
- passages = ["样例段落-1", "样例段落-2"]
2696
  instruction = "为这个句子生成表示以用于检索相关文章:"
 
2697
  model = SentenceTransformer('BAAI/bge-large-zh')
2698
  q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
2699
  p_embeddings = model.encode(passages, normalize_embeddings=True)
2700
  scores = q_embeddings @ p_embeddings.T
2701
  ```
2702
 
2703
- * **Using HuggingFace Transformers**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2704
 
2705
- With transformers package, you can use the model like this: First, you pass your input through the transformer model, then you select the last hidden state of first token (i.e., [CLS]) as the sentence embedding.
 
 
2706
 
2707
  ```python
2708
  from transformers import AutoTokenizer, AutoModel
2709
  import torch
2710
  # Sentences we want sentence embeddings for
2711
  sentences = ["样例数据-1", "样例数据-2"]
 
2712
  # Load model from HuggingFace Hub
2713
  tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-large-zh')
2714
  model = AutoModel.from_pretrained('BAAI/bge-large-zh')
 
 
2715
  # Tokenize sentences
2716
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
2717
- # for retrieval task, add an instruction to query
2718
  # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
 
2719
  # Compute token embeddings
2720
  with torch.no_grad():
2721
  model_output = model(**encoded_input)
@@ -2726,21 +235,65 @@ sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, di
2726
  print("Sentence embeddings:", sentence_embeddings)
2727
  ```
2728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2729
 
2730
  ## Evaluation
 
2731
  `baai-general-embedding` models achieve **state-of-the-art performance on both MTEB and C-MTEB leaderboard!**
2732
- More details and evaluation tools see our [scripts](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md).
2733
 
2734
  - **MTEB**:
2735
 
2736
  | Model Name | Dimension | Sequence Length | Average (56) | Retrieval (15) |Clustering (11) | Pair Classification (3) | Reranking (4) | STS (10) | Summarization (1) | Classification (12) |
2737
  |:----:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
2738
- | [**bge-large-en**](https://huggingface.co/BAAI/bge-large-en) | 1024 | 512 | **63.98** | **53.9** | **46.98** | 85.8 | **59.48** | 81.56 | 32.06 | **76.21** |
2739
- | [**bge-base-en**](https://huggingface.co/BAAI/bge-base-en) | 768 | 512 | 63.36 | 53.0 | 46.32 | 85.86 | 58.7 | 81.84 | 29.27 | 75.27 |
 
 
 
2740
  | [gte-large](https://huggingface.co/thenlper/gte-large) | 1024 | 512 | 63.13 | 52.22 | 46.84 | 85.00 | 59.13 | 83.35 | 31.66 | 73.33 |
2741
  | [gte-base](https://huggingface.co/thenlper/gte-base) | 768 | 512 | 62.39 | 51.14 | 46.2 | 84.57 | 58.61 | 82.3 | 31.17 | 73.01 |
2742
  | [e5-large-v2](https://huggingface.co/intfloat/e5-large-v2) | 1024| 512 | 62.25 | 50.56 | 44.49 | 86.03 | 56.61 | 82.05 | 30.19 | 75.24 |
2743
- | [**bge-small-en**](https://huggingface.co/BAAI/bge-small-en) | 384 | 512 | 62.11 | 51.82 | 44.31 | 83.78 | 57.97 | 80.72 | 30.53 | 74.37 |
2744
  | [instructor-xl](https://huggingface.co/hkunlp/instructor-xl) | 768 | 512 | 61.79 | 49.26 | 44.74 | 86.62 | 57.29 | 83.06 | 32.32 | 61.79 |
2745
  | [e5-base-v2](https://huggingface.co/intfloat/e5-base-v2) | 768 | 512 | 61.5 | 50.29 | 43.80 | 85.73 | 55.91 | 81.05 | 30.28 | 73.84 |
2746
  | [gte-small](https://huggingface.co/thenlper/gte-small) | 384 | 512 | 61.36 | 49.46 | 44.89 | 83.54 | 57.7 | 82.07 | 30.42 | 72.31 |
@@ -2749,85 +302,80 @@ More details and evaluation tools see our [scripts](https://github.com/FlagOpen/
2749
  | [sentence-t5-xxl](https://huggingface.co/sentence-transformers/sentence-t5-xxl) | 768 | 512 | 59.51 | 42.24 | 43.72 | 85.06 | 56.42 | 82.63 | 30.08 | 73.42 |
2750
  | [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) | 768 | 514 | 57.78 | 43.81 | 43.69 | 83.04 | 59.36 | 80.28 | 27.49 | 65.07 |
2751
  | [sgpt-bloom-7b1-msmarco](https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco) | 4096 | 2048 | 57.59 | 48.22 | 38.93 | 81.9 | 55.65 | 77.74 | 33.6 | 66.19 |
2752
- | [all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) | 384 | 512 | 56.53 | 42.69 | 41.81 | 82.41 | 58.44 | 79.8 | 27.9 | 63.21 |
2753
- | [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | 384 | 512 | 56.26 | 41.95 | 42.35 | 82.37 | 58.04 | 78.9 | 30.81 | 63.05 |
2754
- | [contriever-base-msmarco](https://huggingface.co/nthakur/contriever-base-msmarco) | 768 | 512 | 56.00 | 41.88 | 41.1 | 82.54 | 53.14 | 76.51 | 30.36 | 66.68 |
2755
- | [sentence-t5-base](https://huggingface.co/sentence-transformers/sentence-t5-base) | 768 | 512 | 55.27 | 33.63 | 40.21 | 85.18 | 53.09 | 81.14 | 31.39 | 69.81 |
2756
 
2757
 
2758
 
2759
  - **C-MTEB**:
2760
- We create a benchmark C-MTEB for Chinese text embedding which consists of 31 datasets from 6 tasks.
2761
  Please refer to [C_MTEB](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md) for a detailed introduction.
2762
 
2763
  | Model | Embedding dimension | Avg | Retrieval | STS | PairClassification | Classification | Reranking | Clustering |
2764
  |:-------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
2765
- | [**bge-large-zh**](https://huggingface.co/BAAI/bge-large-zh) | 1024 | **64.20** | **71.53** | **53.23** | **78.94** | 72.26 | **65.11** | 48.39 |
2766
- | [**bge-large-zh-noinstruct**](https://huggingface.co/BAAI/bge-large-zh-noinstruct) | 1024 | 63.53 | 70.55 | 50.98 | 76.77 | **72.49** | 64.91 | **50.01** |
2767
- | [**BAAI/bge-base-zh**](https://huggingface.co/BAAI/bge-base-zh) | 768 | 62.96 | 69.53 | 52.05 | 77.5 | 70.98 | 64.91 | 47.63 |
2768
- | [**BAAI/bge-small-zh**](https://huggingface.co/BAAI/bge-small-zh) | 512 | 58.27 | 63.07 | 46.87 | 70.35 | 67.78 | 61.48 | 45.09 |
2769
- | [m3e-base](https://huggingface.co/moka-ai/m3e-base) | 768 | 57.10 |56.91 | 48.15 | 63.99 | 70.28 | 59.34 | 47.68 |
2770
- | [m3e-large](https://huggingface.co/moka-ai/m3e-large) | 1024 | 57.05 |54.75 | 48.64 | 64.3 | 71.22 | 59.66 | 48.88 |
2771
- | [text-embedding-ada-002(OpenAI)](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) | 1536 | 53.02 | 52.0 | 40.61 | 69.56 | 67.38 | 54.28 | 45.68 |
2772
- | [luotuo](https://huggingface.co/silk-road/luotuo-bert-medium) | 1024 | 49.37 | 44.4 | 39.41 | 66.62 | 65.29 | 49.25 | 44.39 |
2773
- | [text2vec](https://huggingface.co/shibing624/text2vec-base-chinese) | 768 | 47.63 | 38.79 | 41.71 | 67.41 | 65.18 | 49.45 | 37.66 |
2774
- | [text2vec-large](https://huggingface.co/GanymedeNil/text2vec-large-chinese) | 1024 | 47.36 | 41.94 | 41.98 | 70.86 | 63.42 | 49.16 | 30.02 |
2775
-
2776
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2777
 
2778
  ## Train
2779
- This section will introduce the way we used to train the general embedding.
2780
- The training scripts are in [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md),
2781
- and we provide some examples to do [pre-train](https://github.com/FlagOpen/FlagEmbedding/blob/master/examples/pretrain/README.md) and [fine-tune](https://github.com/FlagOpen/FlagEmbedding/blob/master/examples/finetune/README.md).
2782
-
2783
 
2784
- **1. RetroMAE Pre-train**
2785
- We pre-train the model following the method [retromae](https://github.com/staoxiao/RetroMAE),
2786
- which shows promising improvement in retrieval task ([paper](https://aclanthology.org/2022.emnlp-main.35.pdf)).
2787
- The pre-training was conducted on 24 A100(40G) GPUs with a batch size of 720.
2788
- In retromae, the mask ratio of encoder and decoder are 0.3, and 0.5 respectively.
2789
- We used the AdamW optimizer and the learning rate is 2e-5.
2790
 
2791
- **Pre-training data**:
2792
- - English:
2793
- - [Pile](https://pile.eleuther.ai/)
2794
- - [wikipedia](https://huggingface.co/datasets/wikipedia)
2795
- - [msmarco](https://huggingface.co/datasets/Tevatron/msmarco-passage-corpus)
2796
- - Chinese:
2797
- - Subset of [wudao](https://github.com/BAAI-WuDao/Data)
2798
- - [baidu-baike](https://baike.baidu.com/)
2799
 
2800
 
2801
- **2. Finetune**
2802
- We fine-tune the model using a contrastive objective.
2803
- The format of input data is a triple`(query, positive, negative)`.
2804
- Besides the negative in the triple, we also adopt in-batch negatives strategy.
2805
- We employ the cross-device negatives sharing method to share negatives among different GPUs,
2806
- which can dramatically **increase the number of negatives**.
2807
 
2808
- We trained our model on 48 A100(40G) GPUs with a large batch size of 32,768 (so there are **65,535** negatives for each query in a batch).
2809
- We used the AdamW optimizer and the learning rate is 1e-5.
2810
- The temperature for contrastive loss is 0.01.
2811
 
2812
- For the version with `*-instrcution`, we add instruction to the query for retrieval task in the training.
2813
- For english, the instruction is `Represent this sentence for searching relevant passages: `;
2814
- For chinese, the instruction is `为这个句子生成表示以用于检索相关文章:`.
2815
- In the evaluation, the instruction should be added for sentence to passages retrieval task, not be added for other tasks.
 
 
2816
 
2817
 
2818
- The finetune script is accessible in this repository: [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md).
2819
- You can easily finetune your model with it.
 
2820
 
2821
- **Training data**:
2822
 
2823
- - For English, we collect 230M text pairs from [wikipedia](https://huggingface.co/datasets/wikipedia), [cc-net](https://github.com/facebookresearch/cc_net), and so on.
2824
-
2825
- - For chinese, we collect 120M text pairs from [wudao](https://github.com/BAAI-WuDao/Data), [simclue](https://github.com/CLUEbenchmark/SimCLUE) and so on.
2826
 
2827
- **The data collection is to be released in the future.**
2828
 
2829
- We will continually update the embedding models and training codes,
2830
- hoping to promote the development of the embedding model community.
2831
 
2832
- ## License
2833
- FlagEmbedding is licensed under [MIT License](https://github.com/FlagOpen/FlagEmbedding/blob/master/LICENSE). The released models can be used for commercial purposes free of charge.
 
1
  ---
2
+ pipeline_tag: sentence-similarity
3
  tags:
4
+ - sentence-transformers
5
+ - feature-extraction
6
+ - sentence-similarity
7
  - transformers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  license: mit
9
  language:
10
  - en
 
11
  ---
12
 
13
+
14
  <h1 align="center">FlagEmbedding</h1>
15
 
16
 
 
20
  <a href=#usage>Usage</a> |
21
  <a href="#evaluation">Evaluation</a> |
22
  <a href="#train">Train</a> |
23
+ <a href="#contact">Contact</a> |
24
  <a href="#license">License</a>
25
  <p>
26
  </h4>
27
 
28
+ More details please refer to our Github: [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding).
29
+
30
+
31
 
32
  [English](README.md) | [中文](https://github.com/FlagOpen/FlagEmbedding/blob/master/README_zh.md)
33
 
 
35
  And it also can be used in vector databases for LLMs.
36
 
37
  ************* 🌟**Updates**🌟 *************
38
+ - 09/12/2023: New Release:
39
+ - **New reranker model**: release a cross-encoder model bge-reranker-base, which is more powerful than embedding model. We recommend to use/fine-tune it to re-rank top-k documents returned by embedding models.
40
+ - **update embedding model**: release bge-*-v1.5 embedding model to alleviate the issue of the similarity distribution, and enhance its retrieval ability without instruction.
41
+ - 09/07/2023: Update [fine-tune code](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md): Add script to mine hard negatives and support adding instruction during fine-tuning.
42
+ - 08/09/2023: BGE Models are integrated into **Langchain**, you can use it like [this](#using-langchain); C-MTEB **leaderboard** is [available](https://huggingface.co/spaces/mteb/leaderboard).
43
  - 08/05/2023: Release base-scale and small-scale models, **best performance among the models of the same size 🤗**
44
  - 08/02/2023: Release `bge-large-*`(short for BAAI General Embedding) Models, **rank 1st on MTEB and C-MTEB benchmark!** :tada: :tada:
45
  - 08/01/2023: We release the [Chinese Massive Text Embedding Benchmark](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB) (**C-MTEB**), consisting of 31 test dataset.
 
49
 
50
  `bge` is short for `BAAI general embedding`.
51
 
52
+ | Model | Language | | Description | query instruction for retrieval\* |
53
+ |:-------------------------------|:--------:| :--------:| :--------:|:--------:|
54
+ | [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large) | Chinese and English | [Inference](#usage-for-reranker) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker) | a cross-encoder model which is more accurate but less efficient \** | |
55
+ | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | Chinese and English | [Inference](#usage-for-reranker) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker) | a cross-encoder model which is more accurate but less efficient \** | |
56
+ | [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `Represent this sentence for searching relevant passages: ` |
57
+ | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `Represent this sentence for searching relevant passages: ` |
58
+ | [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `Represent this sentence for searching relevant passages: ` |
59
+ | [BAAI/bge-large-zh-v1.5](https://huggingface.co/BAAI/bge-large-zh-v1.5) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相关文章:` |
60
+ | [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相���文章:` |
61
+ | [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相关文章:` |
62
+ | [BAAI/bge-large-en](https://huggingface.co/BAAI/bge-large-en) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | :trophy: rank **1st** in [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard | `Represent this sentence for searching relevant passages: ` |
63
+ | [BAAI/bge-base-en](https://huggingface.co/BAAI/bge-base-en) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a base-scale model but with similar ability to `bge-large-en` | `Represent this sentence for searching relevant passages: ` |
64
+ | [BAAI/bge-small-en](https://huggingface.co/BAAI/bge-small-en) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) |a small-scale model but with competitive performance | `Represent this sentence for searching relevant passages: ` |
65
+ | [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | :trophy: rank **1st** in [C-MTEB](https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB) benchmark | `为这个句子生成表示以用于检索相关文章:` |
66
+ | [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a base-scale model but with similar ability to `bge-large-zh` | `为这个句子生成表示以用于检索相关文章:` |
67
+ | [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a small-scale model but with competitive performance | `为这个句子生成表示以用于检索相关文章:` |
68
+
69
+
70
+ \*: If you need to search the relevant passages to a query, we suggest to add the instruction to the query; in other cases, no instruction is needed, just use the original query directly. In all cases, **no instruction** needs to be added to passages.
71
+
72
+ \**: To balance the accuracy and time cost, cross-encoder is widely used to re-rank top-k documents retrieved by other simple models.
73
+ For examples, use bge embedding model to retrieve top 100 relevant documents, and then use bge reranker to re-rank the top 100 document to get the final top-3 results.
74
+
75
+
76
+ ## Frequently asked questions
77
+
78
+ <details>
79
+ <summary>1. How to fine-tune bge embedding model?</summary>
80
+
81
+ <!-- ### How to fine-tune bge embedding model? -->
82
+ Following this [example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) to prepare data and fine-tune your model.
83
+ Some suggestions:
84
+ - Mine hard negatives following this [example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune#data-format), which can improve the retrieval performance.
85
+ - If you pre-train bge on your data, the pre-trained model cannot be directly used to calculate similarity, and it must be fine-tuned with contrastive learning before computing similarity.
86
+ - If the accuracy of the fine-tuned model is still not high, it is recommended to use/fine-tune the cross-encoder model (bge-reranker) to re-rank top-k results. Hard negatives also are needed to fine-tune reranker.
87
+
88
+
89
+ </details>
90
+
91
+ <details>
92
+ <summary>2. The similarity score between two dissimilar sentences is higher than 0.5</summary>
93
+
94
+ <!-- ### The similarity score between two dissimilar sentences is higher than 0.5 -->
95
+ **Suggest to use bge v1.5, which alleviates the issue of the similarity distribution.**
96
+
97
+ Since we finetune the models by contrastive learning with a temperature of 0.01,
98
+ the similarity distribution of the current BGE model is about in the interval \[0.6, 1\].
99
+ So a similarity score greater than 0.5 does not indicate that the two sentences are similar.
100
 
101
+ For downstream tasks, such as passage retrieval or semantic similarity,
102
+ **what matters is the relative order of the scores, not the absolute value.**
103
+ If you need to filter similar sentences based on a similarity threshold,
104
+ please select an appropriate similarity threshold based on the similarity distribution on your data (such as 0.8, 0.85, or even 0.9).
105
+
106
+ </details>
107
+
108
+ <details>
109
+ <summary>3. When does the query instruction need to be used</summary>
110
+
111
+ <!-- ### When does the query instruction need to be used -->
112
+
113
+ For a retrieval task that uses short queries to find long related documents,
114
+ it is recommended to add instructions for these short queries.
115
+ **The best method to decide whether to add instructions for queries is choosing the setting that achieves better performance on your task.**
116
+ In all cases, the documents/passages do not need to add the instruction.
117
+
118
+ </details>
119
 
120
 
121
  ## Usage
122
 
123
+ ### Usage for Embedding Model
124
+
125
+ Here are some examples for using `bge` models with
126
+ [FlagEmbedding](#using-flagembedding), [Sentence-Transformers](#using-sentence-transformers), [Langchain](#using-langchain), or [Huggingface Transformers](#using-huggingface-transformers).
127
+
128
+ #### Using FlagEmbedding
129
  ```
130
  pip install -U FlagEmbedding
131
  ```
132
+ If it doesn't work for you, you can see [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md) for more methods to install FlagEmbedding.
133
 
134
  ```python
135
  from FlagEmbedding import FlagModel
136
+ sentences_1 = ["样例数据-1", "样例数据-2"]
137
+ sentences_2 = ["样例数据-3", "样例数据-4"]
138
  model = FlagModel('BAAI/bge-large-zh', query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:")
139
+ embeddings_1 = model.encode(sentences_1)
140
+ embeddings_2 = model.encode(sentences_2)
141
+ similarity = embeddings_1 @ embeddings_2.T
142
+ print(similarity)
143
+
144
+ # for s2p(short query to long passage) retrieval task, suggest to use encode_queries() which will automatically add the instruction to each query
145
+ # corpus in retrieval task can still use encode() or encode_corpus(), since they don't need instruction
146
  queries = ['query_1', 'query_2']
147
+ passages = ["样例文档-1", "样例文档-2"]
148
  q_embeddings = model.encode_queries(queries)
149
  p_embeddings = model.encode(passages)
150
  scores = q_embeddings @ p_embeddings.T
151
  ```
152
+ For the value of the argument `query_instruction_for_retrieval`, see [Model List](https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list).
153
 
154
+ By default, FlagModel will use all available GPUs when encoding. Please set `os.environ["CUDA_VISIBLE_DEVICES"]` to select specific GPUs.
155
+ You also can set `os.environ["CUDA_VISIBLE_DEVICES"]=""` to make all GPUs unavailable.
156
 
157
 
158
+ #### Using Sentence-Transformers
159
 
160
+ You can also use the `bge` models with [sentence-transformers](https://www.SBERT.net):
161
 
162
  ```
163
  pip install -U sentence-transformers
164
  ```
165
  ```python
166
  from sentence_transformers import SentenceTransformer
167
+ sentences_1 = ["样例数据-1", "样例数据-2"]
168
+ sentences_2 = ["样例数据-3", "样例数据-4"]
169
  model = SentenceTransformer('BAAI/bge-large-zh')
170
+ embeddings_1 = model.encode(sentences_1, normalize_embeddings=True)
171
+ embeddings_2 = model.encode(sentences_2, normalize_embeddings=True)
172
+ similarity = embeddings_1 @ embeddings_2.T
173
+ print(similarity)
174
  ```
175
+ For s2p(short query to long passage) retrieval task,
176
+ each short query should start with an instruction (instructions see [Model List](https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list)).
177
+ But the instruction is not needed for passages.
178
  ```python
179
  from sentence_transformers import SentenceTransformer
180
+ queries = ['query_1', 'query_2']
181
+ passages = ["样例文档-1", "样例文档-2"]
182
  instruction = "为这个句子生成表示以用于检索相关文章:"
183
+
184
  model = SentenceTransformer('BAAI/bge-large-zh')
185
  q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
186
  p_embeddings = model.encode(passages, normalize_embeddings=True)
187
  scores = q_embeddings @ p_embeddings.T
188
  ```
189
 
190
+ #### Using Langchain
191
+
192
+ You can use `bge` in langchain like this:
193
+ ```python
194
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
195
+ model_name = "BAAI/bge-small-en"
196
+ model_kwargs = {'device': 'cuda'}
197
+ encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
198
+ model = HuggingFaceBgeEmbeddings(
199
+ model_name=model_name,
200
+ model_kwargs=model_kwargs,
201
+ encode_kwargs=encode_kwargs,
202
+ query_instruction="为这个句子生成表示以用于检索相关文章:"
203
+ )
204
+ model.query_instruction = "为这个句子生成表示以用于检索相关文章:"
205
+ ```
206
+
207
 
208
+ #### Using HuggingFace Transformers
209
+
210
+ With the transformers package, you can use the model like this: First, you pass your input through the transformer model, then you select the last hidden state of the first token (i.e., [CLS]) as the sentence embedding.
211
 
212
  ```python
213
  from transformers import AutoTokenizer, AutoModel
214
  import torch
215
  # Sentences we want sentence embeddings for
216
  sentences = ["样例数据-1", "样例数据-2"]
217
+
218
  # Load model from HuggingFace Hub
219
  tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-large-zh')
220
  model = AutoModel.from_pretrained('BAAI/bge-large-zh')
221
+ model.eval()
222
+
223
  # Tokenize sentences
224
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
225
+ # for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)
226
  # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
227
+
228
  # Compute token embeddings
229
  with torch.no_grad():
230
  model_output = model(**encoded_input)
 
235
  print("Sentence embeddings:", sentence_embeddings)
236
  ```
237
 
238
+ ### Usage for Reranker
239
+
240
+ You can get a relevance score by inputting query and passage to the reranker.
241
+ The reranker is optimized based cross-entropy loss, so the relevance score is not bounded to a specific range.
242
+
243
+
244
+ #### Using FlagEmbedding
245
+ ```
246
+ pip install -U FlagEmbedding
247
+ ```
248
+
249
+ Get relevance score:
250
+ ```python
251
+ from FlagEmbedding import FlagReranker
252
+ reranker = FlagReranker('BAAI/bge-reranker-base', use_fp16=True) #use fp16 can speed up computing
253
+
254
+ score = reranker.compute_score(['query', 'passage'])
255
+ print(score)
256
+
257
+ scores = reranker.compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
258
+ print(scores)
259
+ ```
260
+
261
+
262
+ #### Using Huggingface transformers
263
+
264
+ ```python
265
+ import torch
266
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, BatchEncoding, PreTrainedTokenizerFast
267
+
268
+ tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-reranker-base')
269
+ model = AutoModelForSequenceClassification.from_pretrained('BAAI/bge-reranker-base')
270
+ model.eval()
271
+
272
+ pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']]
273
+ with torch.no_grad():
274
+ inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
275
+ scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
276
+ print(scores)
277
+ ```
278
 
279
  ## Evaluation
280
+
281
  `baai-general-embedding` models achieve **state-of-the-art performance on both MTEB and C-MTEB leaderboard!**
282
+ For more details and evaluation tools see our [scripts](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md).
283
 
284
  - **MTEB**:
285
 
286
  | Model Name | Dimension | Sequence Length | Average (56) | Retrieval (15) |Clustering (11) | Pair Classification (3) | Reranking (4) | STS (10) | Summarization (1) | Classification (12) |
287
  |:----:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
288
+ | [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) | 1024 | 512 | **64.23** | **54.29** | 46.08 | 87.12 | 60.03 | 83.11 | 31.61 | 75.97 |
289
+ | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | 768 | 512 | 63.55 | 53.25 | 45.77 | 86.55 | 58.86 | 82.4 | 31.07 | 75.53 |
290
+ | [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) | 384 | 512 | 62.17 |51.68 | 43.82 | 84.92 | 58.36 | 81.59 | 30.12 | 74.14 |
291
+ | [bge-large-en](https://huggingface.co/BAAI/bge-large-en) | 1024 | 512 | 63.98 | 53.9 | 46.98 | 85.8 | 59.48 | 81.56 | 32.06 | 76.21 |
292
+ | [bge-base-en](https://huggingface.co/BAAI/bge-base-en) | 768 | 512 | 63.36 | 53.0 | 46.32 | 85.86 | 58.7 | 81.84 | 29.27 | 75.27 |
293
  | [gte-large](https://huggingface.co/thenlper/gte-large) | 1024 | 512 | 63.13 | 52.22 | 46.84 | 85.00 | 59.13 | 83.35 | 31.66 | 73.33 |
294
  | [gte-base](https://huggingface.co/thenlper/gte-base) | 768 | 512 | 62.39 | 51.14 | 46.2 | 84.57 | 58.61 | 82.3 | 31.17 | 73.01 |
295
  | [e5-large-v2](https://huggingface.co/intfloat/e5-large-v2) | 1024| 512 | 62.25 | 50.56 | 44.49 | 86.03 | 56.61 | 82.05 | 30.19 | 75.24 |
296
+ | [bge-small-en](https://huggingface.co/BAAI/bge-small-en) | 384 | 512 | 62.11 | 51.82 | 44.31 | 83.78 | 57.97 | 80.72 | 30.53 | 74.37 |
297
  | [instructor-xl](https://huggingface.co/hkunlp/instructor-xl) | 768 | 512 | 61.79 | 49.26 | 44.74 | 86.62 | 57.29 | 83.06 | 32.32 | 61.79 |
298
  | [e5-base-v2](https://huggingface.co/intfloat/e5-base-v2) | 768 | 512 | 61.5 | 50.29 | 43.80 | 85.73 | 55.91 | 81.05 | 30.28 | 73.84 |
299
  | [gte-small](https://huggingface.co/thenlper/gte-small) | 384 | 512 | 61.36 | 49.46 | 44.89 | 83.54 | 57.7 | 82.07 | 30.42 | 72.31 |
 
302
  | [sentence-t5-xxl](https://huggingface.co/sentence-transformers/sentence-t5-xxl) | 768 | 512 | 59.51 | 42.24 | 43.72 | 85.06 | 56.42 | 82.63 | 30.08 | 73.42 |
303
  | [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) | 768 | 514 | 57.78 | 43.81 | 43.69 | 83.04 | 59.36 | 80.28 | 27.49 | 65.07 |
304
  | [sgpt-bloom-7b1-msmarco](https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco) | 4096 | 2048 | 57.59 | 48.22 | 38.93 | 81.9 | 55.65 | 77.74 | 33.6 | 66.19 |
 
 
 
 
305
 
306
 
307
 
308
  - **C-MTEB**:
309
+ We create the benchmark C-MTEB for Chinese text embedding which consists of 31 datasets from 6 tasks.
310
  Please refer to [C_MTEB](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md) for a detailed introduction.
311
 
312
  | Model | Embedding dimension | Avg | Retrieval | STS | PairClassification | Classification | Reranking | Clustering |
313
  |:-------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
314
+ | [**BAAI/bge-large-zh-v1.5**](https://huggingface.co/BAAI/bge-large-zh-v1.5) | 1024 | **64.53** | 70.46 | 56.25 | 81.6 | 69.13 | 65.84 | 48.99 |
315
+ | [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5) | 768 | 63.13 | 69.49 | 53.72 | 79.75 | 68.07 | 65.39 | 47.53 |
316
+ | [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5) | 512 | 57.82 | 61.77 | 49.11 | 70.41 | 63.96 | 60.92 | 44.18 |
317
+ | [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh) | 1024 | 64.20 | 71.53 | 54.98 | 78.94 | 68.32 | 65.11 | 48.39 |
318
+ | [bge-large-zh-noinstruct](https://huggingface.co/BAAI/bge-large-zh-noinstruct) | 1024 | 63.53 | 70.55 | 53 | 76.77 | 68.58 | 64.91 | 50.01 |
319
+ | [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh) | 768 | 62.96 | 69.53 | 54.12 | 77.5 | 67.07 | 64.91 | 47.63 |
320
+ | [multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) | 1024 | 58.79 | 63.66 | 48.44 | 69.89 | 67.34 | 56.00 | 48.23 |
321
+ | [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh) | 512 | 58.27 | 63.07 | 49.45 | 70.35 | 63.64 | 61.48 | 45.09 |
322
+ | [m3e-base](https://huggingface.co/moka-ai/m3e-base) | 768 | 57.10 | 56.91 | 50.47 | 63.99 | 67.52 | 59.34 | 47.68 |
323
+ | [m3e-large](https://huggingface.co/moka-ai/m3e-large) | 1024 | 57.05 | 54.75 | 50.42 | 64.3 | 68.2 | 59.66 | 48.88 |
324
+ | [multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) | 768 | 55.48 | 61.63 | 46.49 | 67.07 | 65.35 | 54.35 | 40.68 |
325
+ | [multilingual-e5-small](https://huggingface.co/intfloat/multilingual-e5-small) | 384 | 55.38 | 59.95 | 45.27 | 66.45 | 65.85 | 53.86 | 45.26 |
326
+ | [text-embedding-ada-002(OpenAI)](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) | 1536 | 53.02 | 52.0 | 43.35 | 69.56 | 64.31 | 54.28 | 45.68 |
327
+ | [luotuo](https://huggingface.co/silk-road/luotuo-bert-medium) | 1024 | 49.37 | 44.4 | 42.78 | 66.62 | 61 | 49.25 | 44.39 |
328
+ | [text2vec-base](https://huggingface.co/shibing624/text2vec-base-chinese) | 768 | 47.63 | 38.79 | 43.41 | 67.41 | 62.19 | 49.45 | 37.66 |
329
+ | [text2vec-large](https://huggingface.co/GanymedeNil/text2vec-large-chinese) | 1024 | 47.36 | 41.94 | 44.97 | 70.86 | 60.66 | 49.16 | 30.02 |
330
+
331
+
332
+ - **Reranking**:
333
+ See [C_MTEB](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/) for evaluation script.
334
+
335
+ | Model | T2Reranking | T2RerankingZh2En\* | T2RerankingEn2Zh\* | MmarcoReranking | CMedQAv1 | CMedQAv2 | Avg |
336
+ |:-------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
337
+ | text2vec-base-multilingual | 64.66 | 62.94 | 62.51 | 14.37 | 48.46 | 48.6 | 50.26 |
338
+ | multilingual-e5-small | 65.62 | 60.94 | 56.41 | 29.91 | 67.26 | 66.54 | 57.78 |
339
+ | multilingual-e5-large | 64.55 | 61.61 | 54.28 | 28.6 | 67.42 | 67.92 | 57.4 |
340
+ | multilingual-e5-base | 64.21 | 62.13 | 54.68 | 29.5 | 66.23 | 66.98 | 57.29 |
341
+ | m3e-base | 66.03 | 62.74 | 56.07 | 17.51 | 77.05 | 76.76 | 59.36 |
342
+ | m3e-large | 66.13 | 62.72 | 56.1 | 16.46 | 77.76 | 78.27 | 59.57 |
343
+ | bge-base-zh-v1.5 | 66.49 | 63.25 | 57.02 | 29.74 | 80.47 | 84.88 | 63.64 |
344
+ | bge-large-zh-v1.5 | 65.74 | 63.39 | 57.03 | 28.74 | 83.45 | 85.44 | 63.97 |
345
+ | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | 67.28 | 63.95 | 60.45 | 35.46 | 81.26 | 84.1 | 65.42 |
346
+ | [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large) | 67.6 | 64.03 | 61.44 | 37.16 | 82.15 | 84.18 | 66.09 |
347
+
348
+ \* : T2RerankingZh2En and T2RerankingEn2Zh are cross-language retrieval task
349
 
350
  ## Train
 
 
 
 
351
 
352
+ ### BAAI Embedding
 
 
 
 
 
353
 
354
+ We pre-train the models using retromae and train them on large-scale pairs data using contrastive learning.
355
+ **You can fine-tune the embedding model on your data following our [examples](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune).**
356
+ We also provide a [pre-train example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/pretrain).
357
+ Note that the goal of pre-training is to reconstruct the text, and the pre-trained model cannot be used for similarity calculation directly, it needs to be fine-tuned.
358
+ More training details for bge see [baai_general_embedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md).
 
 
 
359
 
360
 
 
 
 
 
 
 
361
 
362
+ ### BGE Reranker
 
 
363
 
364
+ Cross-encoder will perform full-attention over the input pair,
365
+ which is more accurate than embedding model (i.e., bi-encoder) but more time-consuming than embedding model.
366
+ Therefore, it can be used to re-rank the top-k documents returned by embedding model.
367
+ We train the cross-encoder on a multilingual pair data,
368
+ The data format is the same as embedding model, so you can fine-tune it easily following our example.
369
+ More details pelease refer to [./FlagEmbedding/reranker/README.md](./FlagEmbedding/reranker/README.md)
370
 
371
 
372
+ ## Contact
373
+ If you have any question or suggestion related to this project, feel free to open an issue or pull request.
374
+ You also can email Shitao Xiao([email protected]) and Zheng Liu([email protected]).
375
 
 
376
 
377
+ ## License
378
+ FlagEmbedding is licensed under the [MIT License](https://github.com/FlagOpen/FlagEmbedding/blob/master/LICENSE). The released models can be used for commercial purposes free of charge.
 
379
 
 
380
 
 
 
381