diff --git "a/EXTERNAL_MODEL_RESULTS.json" "b/EXTERNAL_MODEL_RESULTS.json" --- "a/EXTERNAL_MODEL_RESULTS.json" +++ "b/EXTERNAL_MODEL_RESULTS.json" @@ -7,11 +7,11 @@ "accuracy": [ { "Model": "Baichuan-text-embedding", - "AmazonReviewsClassification": 48.3, + "AmazonReviewsClassification (zh)": 48.3, "IFlyTek": 50.75, "JDReview": 87.69, - "MassiveIntentClassification": 74.91, - "MassiveScenarioClassification": 81.28, + "MassiveIntentClassification (zh-CN)": 74.91, + "MassiveScenarioClassification (zh-CN)": 81.28, "MultilingualSentiment": 76.83, "OnlineShopping": 94.42, "TNews": 52.62, @@ -80,7 +80,7 @@ "LCQMC": 76.6, "PAWSX": 47.56, "QBQTC": 39.96, - "STS22": 65.78, + "STS22 (zh)": 65.78, "STSB": 80.14 } ] @@ -245,12 +245,12 @@ "accuracy": [ { "Model": "Cohere-embed-multilingual-light-v3.0", - "AmazonReviewsClassification": 38.6, - "MTOPDomainClassification": 80.79, - "MTOPIntentClassification": 50.01, - "MasakhaNEWSClassification": 82.58, - "MassiveIntentClassification": 56.31, - "MassiveScenarioClassification": 59.5 + "AmazonReviewsClassification (fr)": 38.6, + "MTOPDomainClassification (fr)": 80.79, + "MTOPIntentClassification (fr)": 50.01, + "MasakhaNEWSClassification (fra)": 82.58, + "MassiveIntentClassification (fr)": 56.31, + "MassiveScenarioClassification (fr)": 59.5 } ] }, @@ -263,8 +263,8 @@ "HALClusteringS2S": 17.31, "MLSUMClusteringP2P": 42.8, "MLSUMClusteringS2S": 32.72, - "MasakhaNEWSClusteringP2P": 56.81, - "MasakhaNEWSClusteringS2S": 29.41 + "MasakhaNEWSClusteringP2P (fra)": 56.81, + "MasakhaNEWSClusteringS2S (fra)": 29.41 } ] }, @@ -272,13 +272,13 @@ "max_ap": [ { "Model": "Cohere-embed-multilingual-light-v3.0", - "OpusparcusPC": 90.92, - "PawsXPairClassification": 57.32 + "OpusparcusPC (fr)": 90.92, + "PawsXPairClassification (fr)": 57.32 }, { "Model": "Cohere-embed-multilingual-light-v3.0", - "OpusparcusPC": 90.92, - "PawsXPairClassification": 57.35 + "OpusparcusPC (fr)": 90.92, + "PawsXPairClassification (fr)": 57.35 } ] }, @@ -297,9 +297,9 @@ "Model": "Cohere-embed-multilingual-light-v3.0", "AlloprofRetrieval": 35.39, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 23.0, + "MintakaRetrieval (fr)": 23.0, "SyntecRetrieval": 76.88, - "XPQARetrieval": 45.23 + "XPQARetrieval (fr)": 45.23 } ] }, @@ -308,8 +308,8 @@ { "Model": "Cohere-embed-multilingual-light-v3.0", "SICKFr": 75.5, - "STS22": 82.8, - "STSBenchmarkMultilingualSTS": 76.48 + "STS22 (fr)": 82.8, + "STSBenchmarkMultilingualSTS (fr)": 76.48 } ] }, @@ -336,12 +336,12 @@ "accuracy": [ { "Model": "Cohere-embed-multilingual-v3.0", - "AmazonReviewsClassification": 41.89, - "MTOPDomainClassification": 86.23, - "MTOPIntentClassification": 61.07, - "MasakhaNEWSClassification": 83.06, - "MassiveIntentClassification": 62.94, - "MassiveScenarioClassification": 67.29 + "AmazonReviewsClassification (fr)": 41.89, + "MTOPDomainClassification (fr)": 86.23, + "MTOPIntentClassification (fr)": 61.07, + "MasakhaNEWSClassification (fra)": 83.06, + "MassiveIntentClassification (fr)": 62.94, + "MassiveScenarioClassification (fr)": 67.29 } ] }, @@ -354,8 +354,8 @@ "HALClusteringS2S": 19.9, "MLSUMClusteringP2P": 45.08, "MLSUMClusteringS2S": 34.75, - "MasakhaNEWSClusteringP2P": 53.18, - "MasakhaNEWSClusteringS2S": 32.31 + "MasakhaNEWSClusteringP2P (fra)": 53.18, + "MasakhaNEWSClusteringS2S (fra)": 32.31 } ] }, @@ -363,13 +363,13 @@ "max_ap": [ { "Model": "Cohere-embed-multilingual-v3.0", - "OpusparcusPC": 94.08, - "PawsXPairClassification": 61.26 + "OpusparcusPC (fr)": 94.08, + "PawsXPairClassification (fr)": 61.26 }, { "Model": "Cohere-embed-multilingual-v3.0", - "OpusparcusPC": 94.08, - "PawsXPairClassification": 61.26 + "OpusparcusPC (fr)": 94.08, + "PawsXPairClassification (fr)": 61.26 } ] }, @@ -388,9 +388,9 @@ "Model": "Cohere-embed-multilingual-v3.0", "AlloprofRetrieval": 38.36, "BSARDRetrieval": 0.14, - "MintakaRetrieval": 25.44, + "MintakaRetrieval (fr)": 25.44, "SyntecRetrieval": 79.27, - "XPQARetrieval": 58.87 + "XPQARetrieval (fr)": 58.87 } ] }, @@ -399,8 +399,8 @@ { "Model": "Cohere-embed-multilingual-v3.0", "SICKFr": 79.23, - "STS22": 82.76, - "STSBenchmarkMultilingualSTS": 81.84 + "STS22 (fr)": 82.76, + "STSBenchmarkMultilingualSTS (fr)": 81.84 } ] }, @@ -436,8 +436,12 @@ "DKHateClassification": 59.3, "DanishPoliticalCommentsClassification": 39.81, "LccSentimentClassification": 58.0, - "MassiveIntentClassification": 40.82, - "MassiveScenarioClassification": 40.14, + "MassiveIntentClassification (da)": 54.68, + "MassiveIntentClassification (nb)": 45.38, + "MassiveIntentClassification (sv)": 40.82, + "MassiveScenarioClassification (da)": 59.56, + "MassiveScenarioClassification (nb)": 47.55, + "MassiveScenarioClassification (sv)": 40.14, "NoRecClassification": 46.06, "NordicLangClassification": 74.25, "NorwegianParliament": 56.79, @@ -1241,8 +1245,122 @@ "f1": [ { "Model": "LASER2", - "BUCC": 97.7, - "Tatoeba": 95.41 + "BUCC (de-en)": 99.21, + "BUCC (fr-en)": 98.39, + "BUCC (ru-en)": 97.62, + "BUCC (zh-en)": 97.7, + "Tatoeba (afr-eng)": 92.59, + "Tatoeba (amh-eng)": 80.82, + "Tatoeba (ang-eng)": 25.22, + "Tatoeba (ara-eng)": 90.14, + "Tatoeba (arq-eng)": 26.63, + "Tatoeba (arz-eng)": 66.16, + "Tatoeba (ast-eng)": 76.35, + "Tatoeba (awa-eng)": 33.74, + "Tatoeba (aze-eng)": 82.41, + "Tatoeba (bel-eng)": 79.54, + "Tatoeba (ben-eng)": 89.43, + "Tatoeba (ber-eng)": 77.63, + "Tatoeba (bos-eng)": 95.86, + "Tatoeba (bre-eng)": 31.2, + "Tatoeba (bul-eng)": 93.57, + "Tatoeba (cat-eng)": 95.8, + "Tatoeba (cbk-eng)": 77.17, + "Tatoeba (ceb-eng)": 9.93, + "Tatoeba (ces-eng)": 95.52, + "Tatoeba (cha-eng)": 14.86, + "Tatoeba (cmn-eng)": 85.62, + "Tatoeba (cor-eng)": 4.45, + "Tatoeba (csb-eng)": 27.03, + "Tatoeba (cym-eng)": 5.85, + "Tatoeba (dan-eng)": 95.22, + "Tatoeba (deu-eng)": 99.07, + "Tatoeba (dsb-eng)": 42.34, + "Tatoeba (dtp-eng)": 7.39, + "Tatoeba (ell-eng)": 96.2, + "Tatoeba (epo-eng)": 96.61, + "Tatoeba (est-eng)": 96.43, + "Tatoeba (eus-eng)": 93.32, + "Tatoeba (fao-eng)": 57.04, + "Tatoeba (fin-eng)": 96.98, + "Tatoeba (fra-eng)": 94.28, + "Tatoeba (fry-eng)": 42.07, + "Tatoeba (gla-eng)": 1.52, + "Tatoeba (gle-eng)": 4.2, + "Tatoeba (glg-eng)": 96.14, + "Tatoeba (gsw-eng)": 27.52, + "Tatoeba (heb-eng)": 0.0, + "Tatoeba (hin-eng)": 95.32, + "Tatoeba (hrv-eng)": 96.72, + "Tatoeba (hsb-eng)": 45.75, + "Tatoeba (hun-eng)": 95.2, + "Tatoeba (hye-eng)": 88.72, + "Tatoeba (ido-eng)": 80.86, + "Tatoeba (ile-eng)": 87.88, + "Tatoeba (ina-eng)": 93.93, + "Tatoeba (ind-eng)": 92.98, + "Tatoeba (isl-eng)": 94.32, + "Tatoeba (ita-eng)": 94.32, + "Tatoeba (jav-eng)": 9.95, + "Tatoeba (jpn-eng)": 93.78, + "Tatoeba (kab-eng)": 65.88, + "Tatoeba (kat-eng)": 81.16, + "Tatoeba (kaz-eng)": 53.3, + "Tatoeba (khm-eng)": 74.19, + "Tatoeba (kor-eng)": 87.97, + "Tatoeba (kur-eng)": 19.09, + "Tatoeba (kzj-eng)": 4.46, + "Tatoeba (lat-eng)": 64.81, + "Tatoeba (lfn-eng)": 63.39, + "Tatoeba (lit-eng)": 96.2, + "Tatoeba (lvs-eng)": 95.33, + "Tatoeba (mal-eng)": 98.16, + "Tatoeba (mar-eng)": 92.93, + "Tatoeba (max-eng)": 36.96, + "Tatoeba (mhr-eng)": 6.86, + "Tatoeba (mkd-eng)": 93.63, + "Tatoeba (mon-eng)": 3.42, + "Tatoeba (nds-eng)": 77.13, + "Tatoeba (nld-eng)": 95.35, + "Tatoeba (nno-eng)": 72.75, + "Tatoeba (nob-eng)": 95.77, + "Tatoeba (nov-eng)": 60.02, + "Tatoeba (oci-eng)": 58.13, + "Tatoeba (orv-eng)": 23.24, + "Tatoeba (pam-eng)": 3.24, + "Tatoeba (pes-eng)": 93.13, + "Tatoeba (pms-eng)": 36.23, + "Tatoeba (pol-eng)": 97.32, + "Tatoeba (por-eng)": 94.54, + "Tatoeba (ron-eng)": 96.52, + "Tatoeba (rus-eng)": 92.58, + "Tatoeba (slk-eng)": 95.82, + "Tatoeba (slv-eng)": 95.4, + "Tatoeba (spa-eng)": 97.33, + "Tatoeba (sqi-eng)": 97.22, + "Tatoeba (srp-eng)": 93.64, + "Tatoeba (swe-eng)": 95.31, + "Tatoeba (swg-eng)": 33.1, + "Tatoeba (swh-eng)": 55.66, + "Tatoeba (tam-eng)": 87.32, + "Tatoeba (tat-eng)": 34.74, + "Tatoeba (tel-eng)": 96.72, + "Tatoeba (tgl-eng)": 63.19, + "Tatoeba (tha-eng)": 96.38, + "Tatoeba (tuk-eng)": 16.35, + "Tatoeba (tur-eng)": 98.03, + "Tatoeba (tzl-eng)": 36.56, + "Tatoeba (uig-eng)": 56.49, + "Tatoeba (ukr-eng)": 93.52, + "Tatoeba (urd-eng)": 84.23, + "Tatoeba (uzb-eng)": 23.2, + "Tatoeba (vie-eng)": 96.73, + "Tatoeba (war-eng)": 8.25, + "Tatoeba (wuu-eng)": 75.09, + "Tatoeba (xho-eng)": 4.68, + "Tatoeba (yid-eng)": 2.49, + "Tatoeba (yue-eng)": 87.75, + "Tatoeba (zsm-eng)": 95.41 } ] }, @@ -1250,17 +1368,135 @@ "accuracy": [ { "Model": "LASER2", - "AmazonCounterfactualClassification": 68.76, + "AmazonCounterfactualClassification (de)": 67.82, + "AmazonCounterfactualClassification (en)": 76.84, + "AmazonCounterfactualClassification (en-ext)": 76.17, + "AmazonCounterfactualClassification (ja)": 68.76, "AmazonPolarityClassification": 61.01, - "AmazonReviewsClassification": 30.89, + "AmazonReviewsClassification (de)": 31.07, + "AmazonReviewsClassification (en)": 28.71, + "AmazonReviewsClassification (es)": 32.72, + "AmazonReviewsClassification (fr)": 31.12, + "AmazonReviewsClassification (ja)": 28.94, + "AmazonReviewsClassification (zh)": 30.89, "Banking77Classification": 57.76, "EmotionClassification": 24.83, "ImdbClassification": 57.58, - "MTOPDomainClassification": 72.68, - "MTOPIntentClassification": 50.07, - "MasakhaNEWSClassification": 65.9, - "MassiveIntentClassification": 32.93, - "MassiveScenarioClassification": 42.32, + "MTOPDomainClassification (de)": 74.08, + "MTOPDomainClassification (en)": 75.36, + "MTOPDomainClassification (es)": 73.47, + "MTOPDomainClassification (fr)": 72.26, + "MTOPDomainClassification (hi)": 72.95, + "MTOPDomainClassification (th)": 72.68, + "MTOPIntentClassification (de)": 51.62, + "MTOPIntentClassification (en)": 49.47, + "MTOPIntentClassification (es)": 52.75, + "MTOPIntentClassification (fr)": 50.12, + "MTOPIntentClassification (hi)": 45.55, + "MTOPIntentClassification (th)": 50.07, + "MasakhaNEWSClassification (fra)": 65.9, + "MassiveIntentClassification (af)": 38.01, + "MassiveIntentClassification (am)": 12.7, + "MassiveIntentClassification (ar)": 37.16, + "MassiveIntentClassification (az)": 19.98, + "MassiveIntentClassification (bn)": 42.51, + "MassiveIntentClassification (cy)": 17.33, + "MassiveIntentClassification (da)": 45.61, + "MassiveIntentClassification (de)": 44.79, + "MassiveIntentClassification (el)": 46.71, + "MassiveIntentClassification (en)": 47.91, + "MassiveIntentClassification (es)": 45.44, + "MassiveIntentClassification (fa)": 45.01, + "MassiveIntentClassification (fi)": 45.94, + "MassiveIntentClassification (fr)": 46.13, + "MassiveIntentClassification (he)": 42.55, + "MassiveIntentClassification (hi)": 40.2, + "MassiveIntentClassification (hu)": 42.77, + "MassiveIntentClassification (hy)": 28.07, + "MassiveIntentClassification (id)": 45.81, + "MassiveIntentClassification (is)": 39.86, + "MassiveIntentClassification (it)": 48.25, + "MassiveIntentClassification (ja)": 45.3, + "MassiveIntentClassification (jv)": 24.3, + "MassiveIntentClassification (ka)": 22.7, + "MassiveIntentClassification (km)": 22.48, + "MassiveIntentClassification (kn)": 4.32, + "MassiveIntentClassification (ko)": 44.26, + "MassiveIntentClassification (lv)": 39.75, + "MassiveIntentClassification (ml)": 41.33, + "MassiveIntentClassification (mn)": 16.2, + "MassiveIntentClassification (ms)": 43.23, + "MassiveIntentClassification (my)": 25.37, + "MassiveIntentClassification (nb)": 37.74, + "MassiveIntentClassification (nl)": 45.0, + "MassiveIntentClassification (pl)": 44.99, + "MassiveIntentClassification (pt)": 48.55, + "MassiveIntentClassification (ro)": 44.3, + "MassiveIntentClassification (ru)": 44.29, + "MassiveIntentClassification (sl)": 44.72, + "MassiveIntentClassification (sq)": 46.12, + "MassiveIntentClassification (sv)": 45.95, + "MassiveIntentClassification (sw)": 31.89, + "MassiveIntentClassification (ta)": 29.63, + "MassiveIntentClassification (te)": 36.03, + "MassiveIntentClassification (th)": 43.39, + "MassiveIntentClassification (tl)": 29.73, + "MassiveIntentClassification (tr)": 43.93, + "MassiveIntentClassification (ur)": 26.11, + "MassiveIntentClassification (vi)": 44.33, + "MassiveIntentClassification (zh-CN)": 40.62, + "MassiveIntentClassification (zh-TW)": 32.93, + "MassiveScenarioClassification (af)": 47.1, + "MassiveScenarioClassification (am)": 17.7, + "MassiveScenarioClassification (ar)": 45.21, + "MassiveScenarioClassification (az)": 28.21, + "MassiveScenarioClassification (bn)": 50.52, + "MassiveScenarioClassification (cy)": 22.58, + "MassiveScenarioClassification (da)": 54.87, + "MassiveScenarioClassification (de)": 54.34, + "MassiveScenarioClassification (el)": 55.47, + "MassiveScenarioClassification (en)": 55.92, + "MassiveScenarioClassification (es)": 52.77, + "MassiveScenarioClassification (fa)": 52.5, + "MassiveScenarioClassification (fi)": 52.63, + "MassiveScenarioClassification (fr)": 54.32, + "MassiveScenarioClassification (he)": 52.41, + "MassiveScenarioClassification (hi)": 47.37, + "MassiveScenarioClassification (hu)": 53.43, + "MassiveScenarioClassification (hy)": 33.57, + "MassiveScenarioClassification (id)": 54.38, + "MassiveScenarioClassification (is)": 49.78, + "MassiveScenarioClassification (it)": 54.84, + "MassiveScenarioClassification (ja)": 54.12, + "MassiveScenarioClassification (jv)": 32.71, + "MassiveScenarioClassification (ka)": 26.92, + "MassiveScenarioClassification (km)": 27.23, + "MassiveScenarioClassification (kn)": 10.06, + "MassiveScenarioClassification (ko)": 52.01, + "MassiveScenarioClassification (lv)": 44.82, + "MassiveScenarioClassification (ml)": 49.1, + "MassiveScenarioClassification (mn)": 21.51, + "MassiveScenarioClassification (ms)": 53.6, + "MassiveScenarioClassification (my)": 29.72, + "MassiveScenarioClassification (nb)": 43.9, + "MassiveScenarioClassification (nl)": 53.33, + "MassiveScenarioClassification (pl)": 52.92, + "MassiveScenarioClassification (pt)": 53.41, + "MassiveScenarioClassification (ro)": 50.48, + "MassiveScenarioClassification (ru)": 51.84, + "MassiveScenarioClassification (sl)": 51.29, + "MassiveScenarioClassification (sq)": 55.65, + "MassiveScenarioClassification (sv)": 54.64, + "MassiveScenarioClassification (sw)": 42.04, + "MassiveScenarioClassification (ta)": 36.72, + "MassiveScenarioClassification (te)": 42.08, + "MassiveScenarioClassification (th)": 52.15, + "MassiveScenarioClassification (tl)": 37.34, + "MassiveScenarioClassification (tr)": 52.56, + "MassiveScenarioClassification (ur)": 32.6, + "MassiveScenarioClassification (vi)": 50.97, + "MassiveScenarioClassification (zh-CN)": 50.22, + "MassiveScenarioClassification (zh-TW)": 42.32, "ToxicConversationsClassification": 54.05, "TweetSentimentExtractionClassification": 48.73 } @@ -1279,8 +1515,8 @@ "HALClusteringS2S": 11.52, "MLSUMClusteringP2P": 34.53, "MLSUMClusteringS2S": 27.35, - "MasakhaNEWSClusteringP2P": 32.04, - "MasakhaNEWSClusteringS2S": 29.77, + "MasakhaNEWSClusteringP2P (fra)": 32.04, + "MasakhaNEWSClusteringS2S (fra)": 29.77, "MedrxivClusteringP2P": 17.91, "MedrxivClusteringS2S": 16.63, "RedditClustering": 9.96, @@ -1295,16 +1531,16 @@ "max_ap": [ { "Model": "LASER2", - "OpusparcusPC": 93.77, - "PawsXPairClassification": 69.53, + "OpusparcusPC (fr)": 93.77, + "PawsXPairClassification (fr)": 69.53, "SprintDuplicateQuestions": 65.54, "TwitterSemEval2015": 59.57, "TwitterURLCorpus": 81.47 }, { "Model": "LASER2", - "OpusparcusPC": 93.77, - "PawsXPairClassification": 70.31, + "OpusparcusPC (fr)": 93.77, + "PawsXPairClassification (fr)": 70.31, "SprintDuplicateQuestions": 68.48, "TwitterSemEval2015": 59.57, "TwitterURLCorpus": 81.47 @@ -1338,7 +1574,7 @@ "FiQA2018": 1.73, "HotpotQA": 5.5, "MSMARCO": 1.09, - "MintakaRetrieval": 6.31, + "MintakaRetrieval (fr)": 6.31, "NFCorpus": 2.44, "NQ": 0.64, "QuoraRetrieval": 71.14, @@ -1347,7 +1583,7 @@ "SyntecRetrieval": 28.58, "TRECCOVID": 10.97, "Touche2020": 1.06, - "XPQARetrieval": 42.59 + "XPQARetrieval (fr)": 42.59 } ] }, @@ -1363,10 +1599,37 @@ "STS14": 57.03, "STS15": 71.57, "STS16": 70.75, - "STS17": 68.12, - "STS22": 46.19, + "STS17 (ar-ar)": 67.47, + "STS17 (en-ar)": 65.05, + "STS17 (en-de)": 66.66, + "STS17 (en-en)": 76.73, + "STS17 (en-tr)": 70.05, + "STS17 (es-en)": 55.3, + "STS17 (es-es)": 79.67, + "STS17 (fr-en)": 70.82, + "STS17 (it-en)": 70.98, + "STS17 (ko-ko)": 70.52, + "STS17 (nl-en)": 68.12, + "STS22 (ar)": 42.57, + "STS22 (de)": 25.69, + "STS22 (de-en)": 32.35, + "STS22 (de-fr)": 37.41, + "STS22 (de-pl)": 15.67, + "STS22 (en)": 39.76, + "STS22 (es)": 54.92, + "STS22 (es-en)": 54.34, + "STS22 (es-it)": 42.21, + "STS22 (fr)": 58.61, + "STS22 (fr-pl)": 39.44, + "STS22 (it)": 60.31, + "STS22 (pl)": 18.34, + "STS22 (pl-en)": 53.63, + "STS22 (ru)": 39.24, + "STS22 (tr)": 36.97, + "STS22 (zh)": 49.41, + "STS22 (zh-en)": 46.19, "STSBenchmark": 69.77, - "STSBenchmarkMultilingualSTS": 69.82 + "STSBenchmarkMultilingualSTS (fr)": 69.82 } ] }, @@ -1394,16 +1657,16 @@ "accuracy": [ { "Model": "LLM2Vec-Llama-2-supervised", - "AmazonCounterfactualClassification": 82.22, + "AmazonCounterfactualClassification (en)": 82.22, "AmazonPolarityClassification": 89.69, - "AmazonReviewsClassification": 48.47, + "AmazonReviewsClassification (en)": 48.47, "Banking77Classification": 88.17, "EmotionClassification": 51.71, "ImdbClassification": 85.78, - "MTOPDomainClassification": 95.57, - "MTOPIntentClassification": 82.81, - "MassiveIntentClassification": 78.06, - "MassiveScenarioClassification": 81.35, + "MTOPDomainClassification (en)": 95.57, + "MTOPIntentClassification (en)": 82.81, + "MassiveIntentClassification (en)": 78.06, + "MassiveScenarioClassification (en)": 81.35, "ToxicConversationsClassification": 71.01, "TweetSentimentExtractionClassification": 61.11 } @@ -1487,8 +1750,8 @@ "STS14": 84.04, "STS15": 88.72, "STS16": 86.79, - "STS17": 90.63, - "STS22": 67.55, + "STS17 (en-en)": 90.63, + "STS22 (en)": 67.55, "STSBenchmark": 88.72 } ] @@ -1516,16 +1779,16 @@ "accuracy": [ { "Model": "LLM2Vec-Llama-2-unsupervised", - "AmazonCounterfactualClassification": 76.91, + "AmazonCounterfactualClassification (en)": 76.91, "AmazonPolarityClassification": 79.05, - "AmazonReviewsClassification": 40.08, + "AmazonReviewsClassification (en)": 40.08, "Banking77Classification": 84.65, "EmotionClassification": 46.58, "ImdbClassification": 75.68, - "MTOPDomainClassification": 94.33, - "MTOPIntentClassification": 79.54, - "MassiveIntentClassification": 73.84, - "MassiveScenarioClassification": 79.17, + "MTOPDomainClassification (en)": 94.33, + "MTOPIntentClassification (en)": 79.54, + "MassiveIntentClassification (en)": 73.84, + "MassiveScenarioClassification (en)": 79.17, "ToxicConversationsClassification": 71.81, "TweetSentimentExtractionClassification": 57.17 } @@ -1609,8 +1872,8 @@ "STS14": 72.98, "STS15": 82.72, "STS16": 81.02, - "STS17": 86.7, - "STS22": 63.47, + "STS17 (en-en)": 86.7, + "STS22 (en)": 63.47, "STSBenchmark": 78.32 } ] @@ -1638,16 +1901,16 @@ "accuracy": [ { "Model": "LLM2Vec-Meta-Llama-3-supervised", - "AmazonCounterfactualClassification": 79.94, + "AmazonCounterfactualClassification (en)": 79.94, "AmazonPolarityClassification": 86.07, - "AmazonReviewsClassification": 46.84, + "AmazonReviewsClassification (en)": 46.84, "Banking77Classification": 88.05, "EmotionClassification": 51.2, "ImdbClassification": 82.94, - "MTOPDomainClassification": 96.14, - "MTOPIntentClassification": 86.11, - "MassiveIntentClassification": 79.8, - "MassiveScenarioClassification": 81.52, + "MTOPDomainClassification (en)": 96.14, + "MTOPIntentClassification (en)": 86.11, + "MassiveIntentClassification (en)": 79.8, + "MassiveScenarioClassification (en)": 81.52, "ToxicConversationsClassification": 70.59, "TweetSentimentExtractionClassification": 61.9 } @@ -1731,8 +1994,8 @@ "STS14": 82.94, "STS15": 88.09, "STS16": 86.54, - "STS17": 89.58, - "STS22": 67.67, + "STS17 (en-en)": 89.58, + "STS22 (en)": 67.67, "STSBenchmark": 88.05 } ] @@ -1760,16 +2023,16 @@ "accuracy": [ { "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "AmazonCounterfactualClassification": 75.7, + "AmazonCounterfactualClassification (en)": 75.7, "AmazonPolarityClassification": 80.68, - "AmazonReviewsClassification": 40.0, + "AmazonReviewsClassification (en)": 40.0, "Banking77Classification": 84.77, "EmotionClassification": 47.08, "ImdbClassification": 75.19, - "MTOPDomainClassification": 94.47, - "MTOPIntentClassification": 81.09, - "MassiveIntentClassification": 75.01, - "MassiveScenarioClassification": 79.16, + "MTOPDomainClassification (en)": 94.47, + "MTOPIntentClassification (en)": 81.09, + "MassiveIntentClassification (en)": 75.01, + "MassiveScenarioClassification (en)": 79.16, "ToxicConversationsClassification": 71.85, "TweetSentimentExtractionClassification": 57.61 } @@ -1853,8 +2116,8 @@ "STS14": 72.11, "STS15": 82.18, "STS16": 79.41, - "STS17": 85.44, - "STS22": 63.9, + "STS17 (en-en)": 85.44, + "STS22 (en)": 63.9, "STSBenchmark": 77.44 } ] @@ -1882,16 +2145,16 @@ "accuracy": [ { "Model": "LLM2Vec-Mistral-supervised", - "AmazonCounterfactualClassification": 77.58, + "AmazonCounterfactualClassification (en)": 77.58, "AmazonPolarityClassification": 91.12, - "AmazonReviewsClassification": 49.97, + "AmazonReviewsClassification (en)": 49.97, "Banking77Classification": 88.31, "EmotionClassification": 52.04, "ImdbClassification": 87.42, - "MTOPDomainClassification": 96.04, - "MTOPIntentClassification": 84.77, - "MassiveIntentClassification": 79.29, - "MassiveScenarioClassification": 81.64, + "MTOPDomainClassification (en)": 96.04, + "MTOPIntentClassification (en)": 84.77, + "MassiveIntentClassification (en)": 79.29, + "MassiveScenarioClassification (en)": 81.64, "ToxicConversationsClassification": 69.26, "TweetSentimentExtractionClassification": 62.14 } @@ -1975,8 +2238,8 @@ "STS14": 84.04, "STS15": 88.99, "STS16": 87.22, - "STS17": 90.19, - "STS22": 67.68, + "STS17 (en-en)": 90.19, + "STS22 (en)": 67.68, "STSBenchmark": 88.65 } ] @@ -2004,16 +2267,16 @@ "accuracy": [ { "Model": "LLM2Vec-Mistral-unsupervised", - "AmazonCounterfactualClassification": 76.94, + "AmazonCounterfactualClassification (en)": 76.94, "AmazonPolarityClassification": 85.29, - "AmazonReviewsClassification": 47.09, + "AmazonReviewsClassification (en)": 47.09, "Banking77Classification": 86.16, "EmotionClassification": 48.88, "ImdbClassification": 77.95, - "MTOPDomainClassification": 95.48, - "MTOPIntentClassification": 82.84, - "MassiveIntentClassification": 76.65, - "MassiveScenarioClassification": 79.99, + "MTOPDomainClassification (en)": 95.48, + "MTOPIntentClassification (en)": 82.84, + "MassiveIntentClassification (en)": 76.65, + "MassiveScenarioClassification (en)": 79.99, "ToxicConversationsClassification": 70.71, "TweetSentimentExtractionClassification": 60.9 } @@ -2097,8 +2360,8 @@ "STS14": 76.97, "STS15": 83.8, "STS16": 81.91, - "STS17": 85.58, - "STS22": 65.93, + "STS17 (en-en)": 85.58, + "STS22 (en)": 65.93, "STSBenchmark": 80.42 } ] @@ -2126,16 +2389,16 @@ "accuracy": [ { "Model": "LLM2Vec-Sheared-Llama-supervised", - "AmazonCounterfactualClassification": 77.42, + "AmazonCounterfactualClassification (en)": 77.42, "AmazonPolarityClassification": 82.05, - "AmazonReviewsClassification": 40.81, + "AmazonReviewsClassification (en)": 40.81, "Banking77Classification": 86.01, "EmotionClassification": 48.38, "ImdbClassification": 75.33, - "MTOPDomainClassification": 94.09, - "MTOPIntentClassification": 77.05, - "MassiveIntentClassification": 75.58, - "MassiveScenarioClassification": 79.16, + "MTOPDomainClassification (en)": 94.09, + "MTOPIntentClassification (en)": 77.05, + "MassiveIntentClassification (en)": 75.58, + "MassiveScenarioClassification (en)": 79.16, "ToxicConversationsClassification": 69.92, "TweetSentimentExtractionClassification": 60.76 } @@ -2219,8 +2482,8 @@ "STS14": 82.49, "STS15": 88.76, "STS16": 87.11, - "STS17": 90.1, - "STS22": 68.25, + "STS17 (en-en)": 90.1, + "STS22 (en)": 68.25, "STSBenchmark": 87.16 } ] @@ -2248,16 +2511,16 @@ "accuracy": [ { "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "AmazonCounterfactualClassification": 72.93, + "AmazonCounterfactualClassification (en)": 72.93, "AmazonPolarityClassification": 74.28, - "AmazonReviewsClassification": 36.14, + "AmazonReviewsClassification (en)": 36.14, "Banking77Classification": 79.0, "EmotionClassification": 42.85, "ImdbClassification": 71.92, - "MTOPDomainClassification": 91.24, - "MTOPIntentClassification": 74.08, - "MassiveIntentClassification": 69.99, - "MassiveScenarioClassification": 75.15, + "MTOPDomainClassification (en)": 91.24, + "MTOPIntentClassification (en)": 74.08, + "MassiveIntentClassification (en)": 69.99, + "MassiveScenarioClassification (en)": 75.15, "ToxicConversationsClassification": 68.4, "TweetSentimentExtractionClassification": 56.08 } @@ -2341,8 +2604,8 @@ "STS14": 66.7, "STS15": 77.69, "STS16": 75.94, - "STS17": 81.67, - "STS22": 63.7, + "STS17 (en-en)": 81.67, + "STS22 (en)": 63.7, "STSBenchmark": 73.36 } ] @@ -2367,7 +2630,10 @@ "f1": [ { "Model": "LaBSE", - "BUCC": 99.16, + "BUCC (de-en)": 99.35, + "BUCC (fr-en)": 98.72, + "BUCC (ru-en)": 97.78, + "BUCC (zh-en)": 99.16, "BornholmBitextMining": 45.63, "Tatoeba (ber-eng)": 8.4, "Tatoeba (hye-eng)": 94.09, @@ -2480,8 +2746,7 @@ "Tatoeba (lvs-eng)": 95.88, "Tatoeba (max-eng)": 63.26, "Tatoeba (lit-eng)": 96.47, - "Tatoeba (glg-eng)": 96.82, - "Tatoeba": 95.62 + "Tatoeba (glg-eng)": 96.82 } ] }, @@ -2490,19 +2755,17 @@ { "Model": "LaBSE", "AllegroReviews": 34.89, - "AmazonCounterfactualClassification (en-ext)": 76.12, - "AmazonCounterfactualClassification (en)": 75.9, + "AmazonCounterfactualClassification (en-ext)": 76.09, + "AmazonCounterfactualClassification (en)": 75.93, "AmazonCounterfactualClassification (de)": 73.17, - "AmazonCounterfactualClassification (ja)": 76.4, - "AmazonCounterfactualClassification": 76.42, + "AmazonCounterfactualClassification (ja)": 76.42, "AmazonPolarityClassification": 68.95, - "AmazonReviewsClassification (en)": 35.81, - "AmazonReviewsClassification (de)": 39.93, + "AmazonReviewsClassification (en)": 35.8, + "AmazonReviewsClassification (de)": 39.92, "AmazonReviewsClassification (es)": 39.39, - "AmazonReviewsClassification (fr)": 38.53, - "AmazonReviewsClassification (ja)": 36.45, + "AmazonReviewsClassification (fr)": 38.52, + "AmazonReviewsClassification (ja)": 36.44, "AmazonReviewsClassification (zh)": 36.45, - "AmazonReviewsClassification": 36.45, "AngryTweetsClassification": 51.11, "Banking77Classification": 69.85, "CBD": 65.71, @@ -2516,23 +2779,21 @@ "JDReview": 79.14, "KinopoiskClassification": 46.77, "LccSentimentClassification": 50.07, - "MTOPDomainClassification (en)": 86.08, - "MTOPDomainClassification (de)": 86.93, - "MTOPDomainClassification (es)": 84.06, + "MTOPDomainClassification (en)": 86.06, + "MTOPDomainClassification (de)": 86.95, + "MTOPDomainClassification (es)": 84.07, "MTOPDomainClassification (fr)": 84.14, "MTOPDomainClassification (hi)": 85.11, "MTOPDomainClassification (th)": 81.24, - "MTOPDomainClassification": 81.24, - "MTOPIntentClassification (en)": 63.07, - "MTOPIntentClassification (de)": 63.46, - "MTOPIntentClassification (es)": 64.46, - "MTOPIntentClassification (fr)": 62.05, - "MTOPIntentClassification (hi)": 62.61, - "MTOPIntentClassification (th)": 64.7, - "MTOPIntentClassification": 64.61, + "MTOPIntentClassification (en)": 63.03, + "MTOPIntentClassification (de)": 63.42, + "MTOPIntentClassification (es)": 64.44, + "MTOPIntentClassification (fr)": 62.01, + "MTOPIntentClassification (hi)": 62.58, + "MTOPIntentClassification (th)": 64.61, "MasakhaNEWSClassification (amh)": 81.78, "MasakhaNEWSClassification (eng)": 77.77, - "MasakhaNEWSClassification (fra)": 72.09, + "MasakhaNEWSClassification (fra)": 77.39, "MasakhaNEWSClassification (hau)": 73.12, "MasakhaNEWSClassification (ibo)": 69.1, "MasakhaNEWSClassification (lin)": 74.63, @@ -2546,111 +2807,108 @@ "MasakhaNEWSClassification (tir)": 61.73, "MasakhaNEWSClassification (xho)": 77.34, "MasakhaNEWSClassification (yor)": 77.13, - "MasakhaNEWSClassification": 77.39, - "MassiveIntentClassification (ms)": 58.59, - "MassiveIntentClassification (sl)": 59.39, + "MassiveIntentClassification (ms)": 58.6, + "MassiveIntentClassification (sl)": 59.37, "MassiveIntentClassification (hu)": 59.52, - "MassiveIntentClassification (sv)": 59.64, - "MassiveIntentClassification (nl)": 59.31, - "MassiveIntentClassification (id)": 61.14, - "MassiveIntentClassification (vi)": 56.68, - "MassiveIntentClassification (zh-CN)": 63.85, - "MassiveIntentClassification (am)": 55.67, - "MassiveIntentClassification (ko)": 60.93, - "MassiveIntentClassification (el)": 56.96, - "MassiveIntentClassification (nb)": 57.81, - "MassiveIntentClassification (jv)": 50.94, + "MassiveIntentClassification (sv)": 59.66, + "MassiveIntentClassification (nl)": 59.37, + "MassiveIntentClassification (id)": 61.12, + "MassiveIntentClassification (vi)": 56.67, + "MassiveIntentClassification (zh-CN)": 63.86, + "MassiveIntentClassification (am)": 55.71, + "MassiveIntentClassification (ko)": 60.99, + "MassiveIntentClassification (el)": 57.03, + "MassiveIntentClassification (nb)": 57.91, + "MassiveIntentClassification (jv)": 50.98, "MassiveIntentClassification (fa)": 62.33, - "MassiveIntentClassification (ja)": 63.13, - "MassiveIntentClassification (it)": 59.75, - "MassiveIntentClassification (es)": 58.26, - "MassiveIntentClassification (my)": 57.23, - "MassiveIntentClassification (fi)": 60.09, - "MassiveIntentClassification (he)": 56.42, - "MassiveIntentClassification (is)": 54.83, - "MassiveIntentClassification (tl)": 55.1, - "MassiveIntentClassification (kn)": 56.2, - "MassiveIntentClassification (ur)": 56.68, - "MassiveIntentClassification (ml)": 57.75, + "MassiveIntentClassification (ja)": 63.11, + "MassiveIntentClassification (it)": 59.83, + "MassiveIntentClassification (es)": 58.32, + "MassiveIntentClassification (my)": 57.35, + "MassiveIntentClassification (fi)": 60.12, + "MassiveIntentClassification (he)": 56.55, + "MassiveIntentClassification (is)": 54.9, + "MassiveIntentClassification (tl)": 55.28, + "MassiveIntentClassification (kn)": 56.24, + "MassiveIntentClassification (ur)": 56.7, + "MassiveIntentClassification (ml)": 57.91, "MassiveIntentClassification (tr)": 60.91, - "MassiveIntentClassification (en)": 61.44, - "MassiveIntentClassification (sw)": 51.5, - "MassiveIntentClassification (km)": 48.46, - "MassiveIntentClassification (ar)": 50.78, - "MassiveIntentClassification (ru)": 60.64, - "MassiveIntentClassification (te)": 58.33, - "MassiveIntentClassification (ta)": 54.95, - "MassiveIntentClassification (pt)": 60.15, - "MassiveIntentClassification (zh-TW)": 59.47, - "MassiveIntentClassification (pl)": 59.75, - "MassiveIntentClassification (az)": 58.91, - "MassiveIntentClassification (ro)": 57.84, - "MassiveIntentClassification (ka)": 48.26, - "MassiveIntentClassification (sq)": 58.08, - "MassiveIntentClassification (bn)": 58.14, - "MassiveIntentClassification (th)": 56.45, - "MassiveIntentClassification (da)": 58.23, - "MassiveIntentClassification (de)": 56.16, - "MassiveIntentClassification (hy)": 56.22, - "MassiveIntentClassification (mn)": 58.46, - "MassiveIntentClassification (cy)": 50.17, - "MassiveIntentClassification (af)": 56.07, - "MassiveIntentClassification (fr)": 60.42, - "MassiveIntentClassification (hi)": 59.37, - "MassiveIntentClassification (lv)": 57.06, - "MassiveIntentClassification": 59.51, - "MassiveScenarioClassification (te)": 64.12, - "MassiveScenarioClassification (de)": 62.4, - "MassiveScenarioClassification (el)": 64.57, - "MassiveScenarioClassification (ml)": 62.24, - "MassiveScenarioClassification (nl)": 65.17, + "MassiveIntentClassification (en)": 61.46, + "MassiveIntentClassification (sw)": 51.62, + "MassiveIntentClassification (km)": 48.55, + "MassiveIntentClassification (ar)": 50.86, + "MassiveIntentClassification (ru)": 60.67, + "MassiveIntentClassification (te)": 58.32, + "MassiveIntentClassification (ta)": 55.04, + "MassiveIntentClassification (pt)": 60.16, + "MassiveIntentClassification (zh-TW)": 59.51, + "MassiveIntentClassification (pl)": 59.71, + "MassiveIntentClassification (az)": 58.97, + "MassiveIntentClassification (ro)": 57.92, + "MassiveIntentClassification (ka)": 48.35, + "MassiveIntentClassification (sq)": 58.03, + "MassiveIntentClassification (bn)": 58.22, + "MassiveIntentClassification (th)": 56.58, + "MassiveIntentClassification (da)": 58.25, + "MassiveIntentClassification (de)": 56.21, + "MassiveIntentClassification (hy)": 56.2, + "MassiveIntentClassification (mn)": 58.5, + "MassiveIntentClassification (cy)": 50.16, + "MassiveIntentClassification (af)": 56.12, + "MassiveIntentClassification (fr)": 60.47, + "MassiveIntentClassification (hi)": 59.4, + "MassiveIntentClassification (lv)": 57.1, + "MassiveScenarioClassification (te)": 64.13, + "MassiveScenarioClassification (de)": 62.39, + "MassiveScenarioClassification (el)": 64.58, + "MassiveScenarioClassification (ml)": 62.26, + "MassiveScenarioClassification (nl)": 65.16, "MassiveScenarioClassification (jv)": 58.29, - "MassiveScenarioClassification (ro)": 62.39, + "MassiveScenarioClassification (ro)": 62.41, "MassiveScenarioClassification (pt)": 63.28, - "MassiveScenarioClassification (pl)": 64.55, - "MassiveScenarioClassification (sl)": 64.26, - "MassiveScenarioClassification (ta)": 59.07, - "MassiveScenarioClassification (sw)": 58.37, + "MassiveScenarioClassification (pl)": 64.58, + "MassiveScenarioClassification (sl)": 64.25, + "MassiveScenarioClassification (ta)": 59.08, + "MassiveScenarioClassification (sw)": 58.36, "MassiveScenarioClassification (tl)": 60.23, - "MassiveScenarioClassification (id)": 65.86, - "MassiveScenarioClassification (ru)": 65.23, - "MassiveScenarioClassification (sv)": 65.99, - "MassiveScenarioClassification (ar)": 57.74, + "MassiveScenarioClassification (id)": 65.84, + "MassiveScenarioClassification (ru)": 65.25, + "MassiveScenarioClassification (sv)": 66.01, + "MassiveScenarioClassification (ar)": 57.72, "MassiveScenarioClassification (fr)": 65.1, - "MassiveScenarioClassification (th)": 64.32, - "MassiveScenarioClassification (hy)": 61.29, - "MassiveScenarioClassification (nb)": 64.3, - "MassiveScenarioClassification (cy)": 56.12, + "MassiveScenarioClassification (th)": 64.34, + "MassiveScenarioClassification (hy)": 61.25, + "MassiveScenarioClassification (nb)": 64.29, + "MassiveScenarioClassification (cy)": 56.13, "MassiveScenarioClassification (my)": 62.94, - "MassiveScenarioClassification (bn)": 61.86, + "MassiveScenarioClassification (bn)": 61.84, "MassiveScenarioClassification (it)": 64.09, - "MassiveScenarioClassification (ko)": 67.3, + "MassiveScenarioClassification (ko)": 67.26, "MassiveScenarioClassification (kn)": 61.74, - "MassiveScenarioClassification (he)": 63.52, + "MassiveScenarioClassification (he)": 63.53, "MassiveScenarioClassification (am)": 62.02, - "MassiveScenarioClassification (ka)": 53.37, - "MassiveScenarioClassification (vi)": 61.06, - "MassiveScenarioClassification (ur)": 61.47, - "MassiveScenarioClassification (zh-TW)": 67.05, - "MassiveScenarioClassification (en)": 66.44, - "MassiveScenarioClassification (ms)": 65.62, - "MassiveScenarioClassification (is)": 61.93, - "MassiveScenarioClassification (km)": 56.2, + "MassiveScenarioClassification (ka)": 53.38, + "MassiveScenarioClassification (vi)": 61.05, + "MassiveScenarioClassification (ur)": 61.52, + "MassiveScenarioClassification (zh-TW)": 67.08, + "MassiveScenarioClassification (en)": 66.41, + "MassiveScenarioClassification (ms)": 65.63, + "MassiveScenarioClassification (is)": 61.94, + "MassiveScenarioClassification (km)": 56.18, "MassiveScenarioClassification (hu)": 65.82, - "MassiveScenarioClassification (fi)": 64.56, - "MassiveScenarioClassification (sq)": 64.56, - "MassiveScenarioClassification (fa)": 67.43, - "MassiveScenarioClassification (af)": 63.38, - "MassiveScenarioClassification (mn)": 62.62, - "MassiveScenarioClassification (da)": 65.26, - "MassiveScenarioClassification (az)": 63.47, + "MassiveScenarioClassification (fi)": 64.58, + "MassiveScenarioClassification (sq)": 64.54, + "MassiveScenarioClassification (fa)": 67.46, + "MassiveScenarioClassification (af)": 63.39, + "MassiveScenarioClassification (mn)": 62.6, + "MassiveScenarioClassification (da)": 65.24, + "MassiveScenarioClassification (az)": 63.48, "MassiveScenarioClassification (es)": 63.61, - "MassiveScenarioClassification (ja)": 67.7, + "MassiveScenarioClassification (ja)": 67.72, "MassiveScenarioClassification (tr)": 65.43, - "MassiveScenarioClassification (lv)": 61.86, - "MassiveScenarioClassification (zh-CN)": 70.84, - "MassiveScenarioClassification (hi)": 64.41, - "MassiveScenarioClassification": 67.08, + "MassiveScenarioClassification (lv)": 61.87, + "MassiveScenarioClassification (zh-CN)": 70.85, + "MassiveScenarioClassification (hi)": 64.4, "MultilingualSentiment": 64.6, "NoRecClassification": 45.45, "NordicLangClassification": 35.39, @@ -2701,7 +2959,6 @@ "MasakhaNEWSClusteringP2P (tir)": 51.59, "MasakhaNEWSClusteringP2P (xho)": 45.32, "MasakhaNEWSClusteringP2P (yor)": 48.73, - "MasakhaNEWSClusteringP2P": 46.16, "MasakhaNEWSClusteringS2S (amh)": 52.73, "MasakhaNEWSClusteringS2S (eng)": 32.6, "MasakhaNEWSClusteringS2S (fra)": 38.13, @@ -2718,7 +2975,6 @@ "MasakhaNEWSClusteringS2S (tir)": 49.07, "MasakhaNEWSClusteringS2S (xho)": 28.52, "MasakhaNEWSClusteringS2S (yor)": 32.26, - "MasakhaNEWSClusteringS2S": 38.13, "MedrxivClusteringP2P": 30.13, "MedrxivClusteringS2S": 24.82, "RedditClustering": 28.79, @@ -2765,17 +3021,15 @@ "OpusparcusPC (fr)": 93.96, "OpusparcusPC (ru)": 87.3, "OpusparcusPC (sv)": 93.69, - "OpusparcusPC": 93.96, "PPC": 86.97, "PSC": 97.42, "PawsXPairClassification (de)": 51.45, "PawsXPairClassification (en)": 54.07, "PawsXPairClassification (es)": 52.19, - "PawsXPairClassification (fr)": 54.7, + "PawsXPairClassification (fr)": 54.69, "PawsXPairClassification (ja)": 47.74, "PawsXPairClassification (ko)": 49.42, "PawsXPairClassification (zh)": 54.55, - "PawsXPairClassification": 54.69, "SICK-E-PL": 63.77, "SprintDuplicateQuestions": 89.26, "TERRa": 55.71, @@ -2785,10 +3039,10 @@ { "Model": "LaBSE", "CDSC-E": 68.91, - "OpusparcusPC": 93.96, + "OpusparcusPC (fr)": 93.96, "PPC": 86.97, "PSC": 97.42, - "PawsXPairClassification": 54.63, + "PawsXPairClassification (fr)": 54.63, "SICK-E-PL": 63.77, "SprintDuplicateQuestions": 89.26, "TwitterSemEval2015": 62.78, @@ -2878,7 +3132,6 @@ "MintakaRetrieval (it)": 15.94, "MintakaRetrieval (ja)": 12.8, "MintakaRetrieval (pt)": 15.03, - "MintakaRetrieval": 15.53, "NFCorpus": 16.54, "NFCorpus-PL": 17.45, "NQ": 8.42, @@ -2947,7 +3200,7 @@ "XPQARetrieval (cmn-cmn)": 50.7, "XPQARetrieval (eng-cmn)": 20.59, "XPQARetrieval (cmn-eng)": 48.23, - "XPQARetrieval": 51.74 + "XPQARetrieval (fr)": 51.74 } ] }, @@ -3026,10 +3279,37 @@ "STS14": 64.03, "STS15": 76.59, "STS16": 72.98, - "STS17": 75.22, - "STS22": 64.02, + "STS17 (ar-ar)": 69.07, + "STS17 (en-ar)": 74.51, + "STS17 (en-de)": 73.85, + "STS17 (en-en)": 79.45, + "STS17 (en-tr)": 72.07, + "STS17 (es-en)": 65.71, + "STS17 (es-es)": 80.83, + "STS17 (fr-en)": 76.98, + "STS17 (it-en)": 76.99, + "STS17 (ko-ko)": 71.32, + "STS17 (nl-en)": 75.22, + "STS22 (ar)": 57.67, + "STS22 (de)": 48.58, + "STS22 (de-en)": 50.14, + "STS22 (de-fr)": 53.28, + "STS22 (de-pl)": 58.69, + "STS22 (en)": 60.97, + "STS22 (es)": 63.18, + "STS22 (es-en)": 71.86, + "STS22 (es-it)": 69.69, + "STS22 (fr)": 77.95, + "STS22 (fr-pl)": 61.98, + "STS22 (it)": 72.22, + "STS22 (pl)": 39.28, + "STS22 (pl-en)": 69.41, + "STS22 (ru)": 57.49, + "STS22 (tr)": 58.15, + "STS22 (zh)": 63.02, + "STS22 (zh-en)": 64.02, "STSBenchmark": 72.25, - "STSBenchmarkMultilingualSTS": 75.1 + "STSBenchmarkMultilingualSTS (fr)": 75.1 } ] }, @@ -3397,11 +3677,11 @@ "accuracy": [ { "Model": "OpenSearch-text-hybrid", - "AmazonReviewsClassification": 46.18, + "AmazonReviewsClassification (zh)": 46.18, "IFlyTek": 51.8, "JDReview": 86.02, - "MassiveIntentClassification": 73.85, - "MassiveScenarioClassification": 77.13, + "MassiveIntentClassification (zh-CN)": 73.85, + "MassiveScenarioClassification (zh-CN)": 77.13, "MultilingualSentiment": 76.35, "OnlineShopping": 93.2, "TNews": 53.06, @@ -3470,7 +3750,7 @@ "LCQMC": 78.27, "PAWSX": 44.98, "QBQTC": 38.69, - "STS22": 66.53, + "STS22 (zh)": 66.53, "STSB": 82.8 } ] @@ -3879,17 +4159,15 @@ "AllegroReviews": 23.85, "AmazonCounterfactualClassification (en-ext)": 67.24, "AmazonCounterfactualClassification (en)": 65.28, - "AmazonCounterfactualClassification (de)": 57.13, - "AmazonCounterfactualClassification (ja)": 59.94, - "AmazonCounterfactualClassification": 59.91, + "AmazonCounterfactualClassification (de)": 57.1, + "AmazonCounterfactualClassification (ja)": 59.91, "AmazonPolarityClassification": 62.98, "AmazonReviewsClassification (en)": 30.79, - "AmazonReviewsClassification (de)": 25.92, - "AmazonReviewsClassification (es)": 27.64, - "AmazonReviewsClassification (fr)": 27.53, + "AmazonReviewsClassification (de)": 25.91, + "AmazonReviewsClassification (es)": 27.63, + "AmazonReviewsClassification (fr)": 27.54, "AmazonReviewsClassification (ja)": 23.57, "AmazonReviewsClassification (zh)": 22.99, - "AmazonReviewsClassification": 22.99, "AngryTweetsClassification": 42.87, "Banking77Classification": 80.4, "CBD": 48.46, @@ -3903,23 +4181,21 @@ "JDReview": 59.57, "KinopoiskClassification": 34.17, "LccSentimentClassification": 41.93, - "MTOPDomainClassification (en)": 91.88, + "MTOPDomainClassification (en)": 91.9, "MTOPDomainClassification (de)": 72.04, "MTOPDomainClassification (es)": 72.99, - "MTOPDomainClassification (fr)": 75.57, - "MTOPDomainClassification (hi)": 40.4, + "MTOPDomainClassification (fr)": 75.59, + "MTOPDomainClassification (hi)": 40.36, "MTOPDomainClassification (th)": 17.1, - "MTOPDomainClassification": 17.1, - "MTOPIntentClassification (en)": 62.83, - "MTOPIntentClassification (de)": 43.42, - "MTOPIntentClassification (es)": 41.91, - "MTOPIntentClassification (fr)": 38.96, - "MTOPIntentClassification (hi)": 17.76, - "MTOPIntentClassification (th)": 5.9, - "MTOPIntentClassification": 5.63, + "MTOPIntentClassification (en)": 62.84, + "MTOPIntentClassification (de)": 43.41, + "MTOPIntentClassification (es)": 41.88, + "MTOPIntentClassification (fr)": 38.94, + "MTOPIntentClassification (hi)": 17.75, + "MTOPIntentClassification (th)": 5.63, "MasakhaNEWSClassification (amh)": 30.64, "MasakhaNEWSClassification (eng)": 76.62, - "MasakhaNEWSClassification (fra)": 67.18, + "MasakhaNEWSClassification (fra)": 72.2, "MasakhaNEWSClassification (hau)": 52.59, "MasakhaNEWSClassification (ibo)": 54.26, "MasakhaNEWSClassification (lin)": 62.23, @@ -3933,111 +4209,108 @@ "MasakhaNEWSClassification (tir)": 21.18, "MasakhaNEWSClassification (xho)": 54.34, "MasakhaNEWSClassification (yor)": 58.61, - "MasakhaNEWSClassification": 72.2, - "MassiveIntentClassification (ja)": 30.89, + "MassiveIntentClassification (ja)": 30.94, "MassiveIntentClassification (zh-CN)": 23.74, - "MassiveIntentClassification (en)": 67.11, - "MassiveIntentClassification (nb)": 41.79, - "MassiveIntentClassification (ur)": 16.26, - "MassiveIntentClassification (kn)": 3.07, - "MassiveIntentClassification (ru)": 26.29, - "MassiveIntentClassification (de)": 44.12, - "MassiveIntentClassification (az)": 34.3, - "MassiveIntentClassification (hu)": 37.95, - "MassiveIntentClassification (ml)": 2.84, - "MassiveIntentClassification (tr)": 35.93, + "MassiveIntentClassification (en)": 67.15, + "MassiveIntentClassification (nb)": 41.91, + "MassiveIntentClassification (ur)": 16.18, + "MassiveIntentClassification (kn)": 3.08, + "MassiveIntentClassification (ru)": 26.33, + "MassiveIntentClassification (de)": 44.17, + "MassiveIntentClassification (az)": 34.25, + "MassiveIntentClassification (hu)": 38.0, + "MassiveIntentClassification (ml)": 2.85, + "MassiveIntentClassification (tr)": 35.9, "MassiveIntentClassification (ko)": 19.97, - "MassiveIntentClassification (mn)": 23.27, - "MassiveIntentClassification (ta)": 13.12, - "MassiveIntentClassification (fr)": 44.75, - "MassiveIntentClassification (el)": 28.68, - "MassiveIntentClassification (sw)": 35.26, - "MassiveIntentClassification (sv)": 40.33, - "MassiveIntentClassification (vi)": 37.35, - "MassiveIntentClassification (ms)": 36.16, - "MassiveIntentClassification (hi)": 18.0, + "MassiveIntentClassification (mn)": 23.25, + "MassiveIntentClassification (ta)": 13.1, + "MassiveIntentClassification (fr)": 44.82, + "MassiveIntentClassification (el)": 28.7, + "MassiveIntentClassification (sw)": 35.28, + "MassiveIntentClassification (sv)": 40.42, + "MassiveIntentClassification (vi)": 37.38, + "MassiveIntentClassification (ms)": 36.21, + "MassiveIntentClassification (hi)": 17.98, "MassiveIntentClassification (hy)": 8.69, - "MassiveIntentClassification (pl)": 37.59, - "MassiveIntentClassification (pt)": 45.08, - "MassiveIntentClassification (fi)": 39.19, - "MassiveIntentClassification (ar)": 21.02, - "MassiveIntentClassification (da)": 44.35, - "MassiveIntentClassification (af)": 38.84, - "MassiveIntentClassification (sq)": 41.47, + "MassiveIntentClassification (pl)": 37.63, + "MassiveIntentClassification (pt)": 45.12, + "MassiveIntentClassification (fi)": 39.27, + "MassiveIntentClassification (ar)": 20.94, + "MassiveIntentClassification (da)": 44.43, + "MassiveIntentClassification (af)": 38.94, + "MassiveIntentClassification (sq)": 41.62, "MassiveIntentClassification (am)": 2.45, - "MassiveIntentClassification (nl)": 41.77, - "MassiveIntentClassification (es)": 40.82, - "MassiveIntentClassification (it)": 43.16, - "MassiveIntentClassification (km)": 4.98, - "MassiveIntentClassification (id)": 39.65, - "MassiveIntentClassification (bn)": 13.7, - "MassiveIntentClassification (is)": 35.17, - "MassiveIntentClassification (jv)": 36.67, + "MassiveIntentClassification (nl)": 41.85, + "MassiveIntentClassification (es)": 40.91, + "MassiveIntentClassification (it)": 43.17, + "MassiveIntentClassification (km)": 4.99, + "MassiveIntentClassification (id)": 39.66, + "MassiveIntentClassification (bn)": 13.67, + "MassiveIntentClassification (is)": 35.14, + "MassiveIntentClassification (jv)": 36.69, "MassiveIntentClassification (ka)": 9.17, - "MassiveIntentClassification (ro)": 41.64, - "MassiveIntentClassification (sl)": 38.48, - "MassiveIntentClassification (zh-TW)": 22.38, - "MassiveIntentClassification (tl)": 38.63, - "MassiveIntentClassification (fa)": 23.56, - "MassiveIntentClassification (te)": 2.54, - "MassiveIntentClassification (lv)": 38.54, - "MassiveIntentClassification (my)": 4.36, - "MassiveIntentClassification (th)": 10.46, - "MassiveIntentClassification (he)": 23.71, - "MassiveIntentClassification (cy)": 35.65, - "MassiveIntentClassification": 22.39, + "MassiveIntentClassification (ro)": 41.71, + "MassiveIntentClassification (sl)": 38.52, + "MassiveIntentClassification (zh-TW)": 22.39, + "MassiveIntentClassification (tl)": 38.56, + "MassiveIntentClassification (fa)": 23.52, + "MassiveIntentClassification (te)": 2.56, + "MassiveIntentClassification (lv)": 38.61, + "MassiveIntentClassification (my)": 4.38, + "MassiveIntentClassification (th)": 10.54, + "MassiveIntentClassification (he)": 23.65, + "MassiveIntentClassification (cy)": 35.71, "MassiveScenarioClassification (pt)": 53.0, - "MassiveScenarioClassification (zh-TW)": 31.14, - "MassiveScenarioClassification (sv)": 46.81, + "MassiveScenarioClassification (zh-TW)": 31.16, + "MassiveScenarioClassification (sv)": 46.83, "MassiveScenarioClassification (fa)": 29.0, "MassiveScenarioClassification (lv)": 42.75, - "MassiveScenarioClassification (fr)": 53.77, + "MassiveScenarioClassification (fr)": 53.76, "MassiveScenarioClassification (ml)": 7.25, - "MassiveScenarioClassification (is)": 43.11, - "MassiveScenarioClassification (sq)": 49.12, + "MassiveScenarioClassification (is)": 43.08, + "MassiveScenarioClassification (sq)": 49.14, "MassiveScenarioClassification (am)": 7.41, - "MassiveScenarioClassification (cy)": 41.43, - "MassiveScenarioClassification (ro)": 49.94, + "MassiveScenarioClassification (cy)": 41.4, + "MassiveScenarioClassification (ro)": 49.97, "MassiveScenarioClassification (th)": 18.32, "MassiveScenarioClassification (kn)": 8.32, - "MassiveScenarioClassification (ur)": 24.45, - "MassiveScenarioClassification (id)": 44.37, - "MassiveScenarioClassification (el)": 35.55, - "MassiveScenarioClassification (he)": 25.73, - "MassiveScenarioClassification (my)": 10.06, - "MassiveScenarioClassification (de)": 52.08, - "MassiveScenarioClassification (hi)": 23.03, - "MassiveScenarioClassification (hu)": 44.1, - "MassiveScenarioClassification (tl)": 48.29, - "MassiveScenarioClassification (en)": 74.57, - "MassiveScenarioClassification (vi)": 40.97, + "MassiveScenarioClassification (ur)": 24.46, + "MassiveScenarioClassification (id)": 44.35, + "MassiveScenarioClassification (el)": 35.51, + "MassiveScenarioClassification (he)": 25.68, + "MassiveScenarioClassification (my)": 10.07, + "MassiveScenarioClassification (de)": 52.07, + "MassiveScenarioClassification (hi)": 23.02, + "MassiveScenarioClassification (hu)": 44.09, + "MassiveScenarioClassification (tl)": 48.31, + "MassiveScenarioClassification (en)": 74.58, + "MassiveScenarioClassification (vi)": 40.94, "MassiveScenarioClassification (bn)": 18.98, - "MassiveScenarioClassification (ka)": 14.85, - "MassiveScenarioClassification (hy)": 14.87, - "MassiveScenarioClassification (pl)": 44.74, + "MassiveScenarioClassification (ka)": 14.84, + "MassiveScenarioClassification (hy)": 14.83, + "MassiveScenarioClassification (pl)": 44.72, "MassiveScenarioClassification (sw)": 43.18, - "MassiveScenarioClassification (it)": 51.7, - "MassiveScenarioClassification (tr)": 41.8, - "MassiveScenarioClassification (nl)": 49.14, - "MassiveScenarioClassification (az)": 39.62, - "MassiveScenarioClassification (da)": 49.5, - "MassiveScenarioClassification (es)": 50.73, - "MassiveScenarioClassification (ar)": 27.66, - "MassiveScenarioClassification (sl)": 42.24, - "MassiveScenarioClassification (jv)": 44.54, - "MassiveScenarioClassification (ms)": 44.67, + "MassiveScenarioClassification (it)": 51.71, + "MassiveScenarioClassification (tr)": 41.79, + "MassiveScenarioClassification (nl)": 49.15, + "MassiveScenarioClassification (az)": 39.58, + "MassiveScenarioClassification (da)": 49.47, + "MassiveScenarioClassification (es)": 50.74, + "MassiveScenarioClassification (ar)": 27.62, + "MassiveScenarioClassification (sl)": 42.26, + "MassiveScenarioClassification (jv)": 44.57, + "MassiveScenarioClassification (ms)": 44.65, "MassiveScenarioClassification (fi)": 45.8, "MassiveScenarioClassification (km)": 9.75, - "MassiveScenarioClassification (ru)": 28.77, - "MassiveScenarioClassification (mn)": 29.01, - "MassiveScenarioClassification (ta)": 19.4, + "MassiveScenarioClassification (ru)": 28.75, + "MassiveScenarioClassification (mn)": 29.03, + "MassiveScenarioClassification (ta)": 19.38, "MassiveScenarioClassification (te)": 7.74, - "MassiveScenarioClassification (ja)": 36.77, + "MassiveScenarioClassification (ja)": 36.75, "MassiveScenarioClassification (ko)": 25.72, - "MassiveScenarioClassification (zh-CN)": 33.19, - "MassiveScenarioClassification (nb)": 47.35, - "MassiveScenarioClassification (af)": 45.72, - "MassiveScenarioClassification": 31.16, + "MassiveScenarioClassification (zh-CN)": 33.18, + "MassiveScenarioClassification (nb)": 47.36, + "MassiveScenarioClassification (af)": 45.71, "MultilingualSentiment": 40.52, "NoRecClassification": 37.73, "NordicLangClassification": 54.17, @@ -4085,7 +4358,6 @@ "MasakhaNEWSClusteringP2P (tir)": 44.08, "MasakhaNEWSClusteringP2P (xho)": 26.97, "MasakhaNEWSClusteringP2P (yor)": 32.51, - "MasakhaNEWSClusteringP2P": 42.72, "MasakhaNEWSClusteringS2S (amh)": 44.11, "MasakhaNEWSClusteringS2S (eng)": 40.71, "MasakhaNEWSClusteringS2S (fra)": 32.47, @@ -4102,7 +4374,6 @@ "MasakhaNEWSClusteringS2S (tir)": 43.63, "MasakhaNEWSClusteringS2S (xho)": 24.55, "MasakhaNEWSClusteringS2S (yor)": 32.85, - "MasakhaNEWSClusteringS2S": 32.47, "MedrxivClusteringP2P": 34.25, "MedrxivClusteringS2S": 32.24, "RedditClustering": 51.18, @@ -4309,7 +4580,7 @@ "XPQARetrieval (cmn-cmn)": 21.07, "XPQARetrieval (eng-cmn)": 6.58, "XPQARetrieval (cmn-eng)": 9.39, - "XPQARetrieval": 55.9 + "XPQARetrieval (fr)": 55.9 } ] }, @@ -4378,8 +4649,35 @@ }, { "Model": "all-MiniLM-L12-v2", - "STS17": 24.51, - "STS22": 44.39 + "STS17 (ar-ar)": 58.71, + "STS17 (en-ar)": 0.54, + "STS17 (en-de)": 27.54, + "STS17 (en-en)": 88.63, + "STS17 (en-tr)": 0.43, + "STS17 (es-en)": 22.01, + "STS17 (es-es)": 78.37, + "STS17 (fr-en)": 30.7, + "STS17 (it-en)": 24.28, + "STS17 (ko-ko)": 43.37, + "STS17 (nl-en)": 24.51, + "STS22 (ar)": 17.54, + "STS22 (de)": 22.53, + "STS22 (de-en)": 42.86, + "STS22 (de-fr)": 43.52, + "STS22 (de-pl)": 1.63, + "STS22 (en)": 65.67, + "STS22 (es)": 43.98, + "STS22 (es-en)": 53.99, + "STS22 (es-it)": 40.71, + "STS22 (fr)": 69.51, + "STS22 (fr-pl)": 16.9, + "STS22 (it)": 47.48, + "STS22 (pl)": 19.22, + "STS22 (pl-en)": 42.67, + "STS22 (ru)": 11.19, + "STS22 (tr)": 21.6, + "STS22 (zh)": 33.15, + "STS22 (zh-en)": 44.39 } ] }, @@ -4543,13 +4841,11 @@ { "Model": "all-MiniLM-L6-v2", "AllegroReviews": 24.64, - "AmazonCounterfactualClassification": 64.15, - "AmazonCounterfactualClassification (en-ext)": 65.59, "AmazonCounterfactualClassification (en)": 63.64, + "AmazonCounterfactualClassification (en-ext)": 65.59, "AmazonCounterfactualClassification (de)": 57.82, "AmazonCounterfactualClassification (ja)": 60.9, "AmazonPolarityClassification": 64.26, - "AmazonReviewsClassification": 31.79, "AmazonReviewsClassification (en)": 30.85, "AmazonReviewsClassification (de)": 26.44, "AmazonReviewsClassification (es)": 27.35, @@ -4570,24 +4866,21 @@ "JDReview": 59.98, "KinopoiskClassification": 33.93, "LccSentimentClassification": 38.53, - "MTOPDomainClassification": 91.56, "MTOPDomainClassification (en)": 91.68, "MTOPDomainClassification (de)": 70.47, "MTOPDomainClassification (es)": 72.99, "MTOPDomainClassification (fr)": 75.1, "MTOPDomainClassification (hi)": 40.74, "MTOPDomainClassification (th)": 15.66, - "MTOPIntentClassification": 62.18, "MTOPIntentClassification (en)": 61.55, "MTOPIntentClassification (de)": 45.7, "MTOPIntentClassification (es)": 44.19, "MTOPIntentClassification (fr)": 39.67, "MTOPIntentClassification (hi)": 18.69, "MTOPIntentClassification (th)": 5.03, - "MasakhaNEWSClassification": 74.05, + "MasakhaNEWSClassification (fra)": 68.84, "MasakhaNEWSClassification (amh)": 33.03, "MasakhaNEWSClassification (eng)": 77.11, - "MasakhaNEWSClassification (fra)": 68.84, "MasakhaNEWSClassification (hau)": 50.49, "MasakhaNEWSClassification (ibo)": 52.15, "MasakhaNEWSClassification (lin)": 68.29, @@ -4601,9 +4894,11 @@ "MasakhaNEWSClassification (tir)": 28.97, "MasakhaNEWSClassification (xho)": 54.14, "MasakhaNEWSClassification (yor)": 55.01, - "MassiveIntentClassification": 38.1, - "MassiveIntentClassification (mn)": 20.35, "MassiveIntentClassification (en)": 66.94, + "MassiveIntentClassification (da)": 41.0, + "MassiveIntentClassification (nb)": 39.36, + "MassiveIntentClassification (sv)": 38.09, + "MassiveIntentClassification (mn)": 20.35, "MassiveIntentClassification (ta)": 11.31, "MassiveIntentClassification (ru)": 27.58, "MassiveIntentClassification (fi)": 38.37, @@ -4611,7 +4906,6 @@ "MassiveIntentClassification (hi)": 17.7, "MassiveIntentClassification (fr)": 42.55, "MassiveIntentClassification (pt)": 43.76, - "MassiveIntentClassification (sv)": 38.09, "MassiveIntentClassification (ml)": 2.89, "MassiveIntentClassification (zh-TW)": 22.56, "MassiveIntentClassification (vi)": 37.09, @@ -4633,11 +4927,9 @@ "MassiveIntentClassification (de)": 43.44, "MassiveIntentClassification (th)": 11.26, "MassiveIntentClassification (cy)": 34.54, - "MassiveIntentClassification (da)": 41.0, "MassiveIntentClassification (ko)": 16.05, "MassiveIntentClassification (kn)": 3.14, "MassiveIntentClassification (my)": 4.24, - "MassiveIntentClassification (nb)": 39.36, "MassiveIntentClassification (zh-CN)": 24.4, "MassiveIntentClassification (ro)": 40.54, "MassiveIntentClassification (ja)": 31.87, @@ -4653,7 +4945,10 @@ "MassiveIntentClassification (sl)": 36.7, "MassiveIntentClassification (km)": 4.91, "MassiveIntentClassification (es)": 39.88, - "MassiveScenarioClassification": 42.93, + "MassiveScenarioClassification (en)": 73.81, + "MassiveScenarioClassification (da)": 47.02, + "MassiveScenarioClassification (nb)": 44.67, + "MassiveScenarioClassification (sv)": 42.95, "MassiveScenarioClassification (ka)": 14.92, "MassiveScenarioClassification (tr)": 38.85, "MassiveScenarioClassification (es)": 49.0, @@ -4667,7 +4962,6 @@ "MassiveScenarioClassification (nl)": 48.43, "MassiveScenarioClassification (ja)": 37.3, "MassiveScenarioClassification (ml)": 7.67, - "MassiveScenarioClassification (da)": 47.02, "MassiveScenarioClassification (kn)": 7.85, "MassiveScenarioClassification (my)": 10.61, "MassiveScenarioClassification (ms)": 43.67, @@ -4699,9 +4993,6 @@ "MassiveScenarioClassification (bn)": 20.56, "MassiveScenarioClassification (fi)": 42.38, "MassiveScenarioClassification (pt)": 50.72, - "MassiveScenarioClassification (en)": 73.81, - "MassiveScenarioClassification (sv)": 42.95, - "MassiveScenarioClassification (nb)": 44.67, "MassiveScenarioClassification (zh-CN)": 33.65, "MassiveScenarioClassification (ur)": 23.73, "MassiveScenarioClassification (de)": 51.47, @@ -4741,10 +5032,9 @@ "MLSUMClusteringP2P (ru)": 23.91, "MLSUMClusteringS2S": 28.12, "MLSUMClusteringS2S (ru)": 19.07, - "MasakhaNEWSClusteringP2P": 34.92, + "MasakhaNEWSClusteringP2P (fra)": 34.92, "MasakhaNEWSClusteringP2P (amh)": 43.85, "MasakhaNEWSClusteringP2P (eng)": 48.88, - "MasakhaNEWSClusteringP2P (fra)": 34.92, "MasakhaNEWSClusteringP2P (hau)": 24.77, "MasakhaNEWSClusteringP2P (ibo)": 45.94, "MasakhaNEWSClusteringP2P (lin)": 69.56, @@ -4758,10 +5048,9 @@ "MasakhaNEWSClusteringP2P (tir)": 42.93, "MasakhaNEWSClusteringP2P (xho)": 28.58, "MasakhaNEWSClusteringP2P (yor)": 31.45, - "MasakhaNEWSClusteringS2S": 40.58, + "MasakhaNEWSClusteringS2S (fra)": 40.58, "MasakhaNEWSClusteringS2S (amh)": 45.44, "MasakhaNEWSClusteringS2S (eng)": 41.09, - "MasakhaNEWSClusteringS2S (fra)": 40.58, "MasakhaNEWSClusteringS2S (hau)": 15.42, "MasakhaNEWSClusteringS2S (ibo)": 37.02, "MasakhaNEWSClusteringS2S (lin)": 65.14, @@ -4815,19 +5104,17 @@ { "Model": "all-MiniLM-L6-v2", "CDSC-E": 47.27, - "OpusparcusPC": 86.53, + "OpusparcusPC (fr)": 86.53, "OpusparcusPC (de)": 89.91, "OpusparcusPC (en)": 97.46, "OpusparcusPC (fi)": 85.44, - "OpusparcusPC (fr)": 86.53, "OpusparcusPC (ru)": 79.28, "OpusparcusPC (sv)": 83.78, "PSC": 81.87, - "PawsXPairClassification": 55.51, + "PawsXPairClassification (fr)": 55.52, "PawsXPairClassification (de)": 51.55, "PawsXPairClassification (en)": 59.1, "PawsXPairClassification (es)": 52.23, - "PawsXPairClassification (fr)": 55.52, "PawsXPairClassification (ja)": 49.19, "PawsXPairClassification (ko)": 50.59, "PawsXPairClassification (zh)": 53.18, @@ -4839,8 +5126,8 @@ }, { "Model": "all-MiniLM-L6-v2", - "OpusparcusPC": 86.53, - "PawsXPairClassification": 55.4, + "OpusparcusPC (fr)": 86.53, + "PawsXPairClassification (fr)": 55.4, "SprintDuplicateQuestions": 94.55, "TwitterSemEval2015": 67.86, "TwitterURLCorpus": 84.7 @@ -4919,11 +5206,10 @@ "MMarcoRetrieval": 6.21, "MSMARCO": 36.54, "MedicalRetrieval": 1.76, - "MintakaRetrieval": 9.19, + "MintakaRetrieval (fr)": 9.19, "MintakaRetrieval (ar)": 2.22, "MintakaRetrieval (de)": 15.43, "MintakaRetrieval (es)": 7.72, - "MintakaRetrieval (fr)": 9.19, "MintakaRetrieval (hi)": 2.65, "MintakaRetrieval (it)": 8.48, "MintakaRetrieval (ja)": 6.72, @@ -4958,7 +5244,7 @@ "Touche2020": 16.9, "VideoRetrieval": 9.79, "WinoGrande": 47.35, - "XPQARetrieval": 51.79, + "XPQARetrieval (fr)": 51.79, "XPQARetrieval (ara-ara)": 8.05, "XPQARetrieval (eng-ara)": 1.9, "XPQARetrieval (ara-eng)": 6.87, @@ -5071,10 +5357,37 @@ "STS14": 75.59, "STS15": 85.39, "STS16": 78.99, - "STS17": 29.0, - "STS22": 41.64, + "STS17 (ar-ar)": 50.89, + "STS17 (en-ar)": -4.28, + "STS17 (en-de)": 35.82, + "STS17 (en-en)": 87.59, + "STS17 (en-tr)": 4.5, + "STS17 (es-en)": 16.31, + "STS17 (es-es)": 76.12, + "STS17 (fr-en)": 37.09, + "STS17 (it-en)": 24.45, + "STS17 (ko-ko)": 43.39, + "STS17 (nl-en)": 29.0, + "STS22 (ar)": 22.64, + "STS22 (de)": 31.04, + "STS22 (de-en)": 44.04, + "STS22 (de-fr)": 30.07, + "STS22 (de-pl)": 4.93, + "STS22 (en)": 67.21, + "STS22 (es)": 54.78, + "STS22 (es-en)": 53.42, + "STS22 (es-it)": 44.27, + "STS22 (fr)": 77.0, + "STS22 (fr-pl)": 50.71, + "STS22 (it)": 60.4, + "STS22 (pl)": 26.77, + "STS22 (pl-en)": 32.8, + "STS22 (ru)": 14.72, + "STS22 (tr)": 33.69, + "STS22 (zh)": 44.93, + "STS22 (zh-en)": 41.64, "STSBenchmark": 82.03, - "STSBenchmarkMultilingualSTS": 64.93 + "STSBenchmarkMultilingualSTS (fr)": 64.93 } ] }, @@ -5346,7 +5659,7 @@ "MasakhaNEWSClassification (tir)": 27.46, "MasakhaNEWSClassification (xho)": 60.98, "MasakhaNEWSClassification (yor)": 63.33, - "MassiveIntentClassification": 69.57, + "MassiveIntentClassification (en)": 69.76, "MassiveIntentClassification (ar)": 20.42, "MassiveIntentClassification (is)": 31.46, "MassiveIntentClassification (ru)": 23.98, @@ -5366,7 +5679,6 @@ "MassiveIntentClassification (hy)": 10.11, "MassiveIntentClassification (nl)": 38.49, "MassiveIntentClassification (tr)": 32.02, - "MassiveIntentClassification (en)": 69.76, "MassiveIntentClassification (ur)": 12.86, "MassiveIntentClassification (cy)": 30.82, "MassiveIntentClassification (fr)": 44.27, @@ -5398,7 +5710,7 @@ "MassiveIntentClassification (ko)": 13.35, "MassiveIntentClassification (sl)": 34.49, "MassiveIntentClassification (km)": 4.76, - "MassiveScenarioClassification": 76.01, + "MassiveScenarioClassification (en)": 75.67, "MassiveScenarioClassification (ko)": 17.28, "MassiveScenarioClassification (sv)": 44.53, "MassiveScenarioClassification (hy)": 16.86, @@ -5424,7 +5736,6 @@ "MassiveScenarioClassification (el)": 33.85, "MassiveScenarioClassification (az)": 36.42, "MassiveScenarioClassification (he)": 25.49, - "MassiveScenarioClassification (en)": 75.67, "MassiveScenarioClassification (ka)": 13.45, "MassiveScenarioClassification (af)": 43.63, "MassiveScenarioClassification (bn)": 17.49, @@ -5807,8 +6118,8 @@ }, { "Model": "all-mpnet-base-v2", - "STS17": 90.6, - "STS22": 67.95 + "STS17 (en-en)": 90.6, + "STS22 (en)": 67.95 } ] }, @@ -5905,16 +6216,134 @@ "accuracy": [ { "Model": "allenai-specter", - "AmazonCounterfactualClassification": 43.87, + "AmazonCounterfactualClassification (de)": 54.46, + "AmazonCounterfactualClassification (en)": 58.7, + "AmazonCounterfactualClassification (en-ext)": 59.28, + "AmazonCounterfactualClassification (ja)": 43.87, "AmazonPolarityClassification": 57.77, - "AmazonReviewsClassification": 20.49, + "AmazonReviewsClassification (de)": 24.08, + "AmazonReviewsClassification (en)": 26.26, + "AmazonReviewsClassification (es)": 23.88, + "AmazonReviewsClassification (fr)": 23.31, + "AmazonReviewsClassification (ja)": 20.25, + "AmazonReviewsClassification (zh)": 20.49, "Banking77Classification": 66.66, "EmotionClassification": 24.82, "ImdbClassification": 56.35, - "MTOPDomainClassification": 14.98, - "MTOPIntentClassification": 4.67, - "MassiveIntentClassification": 4.79, - "MassiveScenarioClassification": 10.19, + "MTOPDomainClassification (de)": 48.55, + "MTOPDomainClassification (en)": 74.53, + "MTOPDomainClassification (es)": 58.39, + "MTOPDomainClassification (fr)": 54.61, + "MTOPDomainClassification (hi)": 21.22, + "MTOPDomainClassification (th)": 14.98, + "MTOPIntentClassification (de)": 35.55, + "MTOPIntentClassification (en)": 50.05, + "MTOPIntentClassification (es)": 36.72, + "MTOPIntentClassification (fr)": 34.71, + "MTOPIntentClassification (hi)": 4.44, + "MTOPIntentClassification (th)": 4.67, + "MassiveIntentClassification (af)": 33.68, + "MassiveIntentClassification (am)": 2.94, + "MassiveIntentClassification (ar)": 10.04, + "MassiveIntentClassification (az)": 30.74, + "MassiveIntentClassification (bn)": 3.02, + "MassiveIntentClassification (cy)": 33.94, + "MassiveIntentClassification (da)": 38.47, + "MassiveIntentClassification (de)": 36.06, + "MassiveIntentClassification (el)": 27.7, + "MassiveIntentClassification (en)": 51.73, + "MassiveIntentClassification (es)": 35.6, + "MassiveIntentClassification (fa)": 17.97, + "MassiveIntentClassification (fi)": 35.53, + "MassiveIntentClassification (fr)": 38.41, + "MassiveIntentClassification (he)": 2.69, + "MassiveIntentClassification (hi)": 3.43, + "MassiveIntentClassification (hu)": 34.05, + "MassiveIntentClassification (hy)": 3.11, + "MassiveIntentClassification (id)": 40.02, + "MassiveIntentClassification (is)": 32.63, + "MassiveIntentClassification (it)": 39.28, + "MassiveIntentClassification (ja)": 4.95, + "MassiveIntentClassification (jv)": 34.95, + "MassiveIntentClassification (ka)": 2.57, + "MassiveIntentClassification (km)": 4.73, + "MassiveIntentClassification (kn)": 3.54, + "MassiveIntentClassification (ko)": 2.68, + "MassiveIntentClassification (lv)": 37.91, + "MassiveIntentClassification (ml)": 2.88, + "MassiveIntentClassification (mn)": 16.94, + "MassiveIntentClassification (ms)": 36.6, + "MassiveIntentClassification (my)": 3.96, + "MassiveIntentClassification (nb)": 34.75, + "MassiveIntentClassification (nl)": 33.95, + "MassiveIntentClassification (pl)": 35.77, + "MassiveIntentClassification (pt)": 43.05, + "MassiveIntentClassification (ro)": 36.2, + "MassiveIntentClassification (ru)": 25.3, + "MassiveIntentClassification (sl)": 35.9, + "MassiveIntentClassification (sq)": 36.6, + "MassiveIntentClassification (sv)": 36.0, + "MassiveIntentClassification (sw)": 34.81, + "MassiveIntentClassification (ta)": 3.11, + "MassiveIntentClassification (te)": 2.53, + "MassiveIntentClassification (th)": 4.38, + "MassiveIntentClassification (tl)": 35.51, + "MassiveIntentClassification (tr)": 32.02, + "MassiveIntentClassification (ur)": 9.61, + "MassiveIntentClassification (vi)": 37.07, + "MassiveIntentClassification (zh-CN)": 2.81, + "MassiveIntentClassification (zh-TW)": 4.79, + "MassiveScenarioClassification (af)": 36.17, + "MassiveScenarioClassification (am)": 7.64, + "MassiveScenarioClassification (ar)": 15.26, + "MassiveScenarioClassification (az)": 30.73, + "MassiveScenarioClassification (bn)": 7.15, + "MassiveScenarioClassification (cy)": 34.73, + "MassiveScenarioClassification (da)": 39.93, + "MassiveScenarioClassification (de)": 38.62, + "MassiveScenarioClassification (el)": 27.18, + "MassiveScenarioClassification (en)": 58.58, + "MassiveScenarioClassification (es)": 39.44, + "MassiveScenarioClassification (fa)": 21.43, + "MassiveScenarioClassification (fi)": 33.21, + "MassiveScenarioClassification (fr)": 40.26, + "MassiveScenarioClassification (he)": 7.42, + "MassiveScenarioClassification (hi)": 8.06, + "MassiveScenarioClassification (hu)": 34.54, + "MassiveScenarioClassification (hy)": 8.61, + "MassiveScenarioClassification (id)": 40.04, + "MassiveScenarioClassification (is)": 33.57, + "MassiveScenarioClassification (it)": 40.1, + "MassiveScenarioClassification (ja)": 9.96, + "MassiveScenarioClassification (jv)": 36.11, + "MassiveScenarioClassification (ka)": 7.13, + "MassiveScenarioClassification (km)": 9.66, + "MassiveScenarioClassification (kn)": 7.55, + "MassiveScenarioClassification (ko)": 7.27, + "MassiveScenarioClassification (lv)": 37.03, + "MassiveScenarioClassification (ml)": 7.22, + "MassiveScenarioClassification (mn)": 21.53, + "MassiveScenarioClassification (ms)": 37.57, + "MassiveScenarioClassification (my)": 9.54, + "MassiveScenarioClassification (nb)": 35.71, + "MassiveScenarioClassification (nl)": 34.62, + "MassiveScenarioClassification (pl)": 36.87, + "MassiveScenarioClassification (pt)": 44.68, + "MassiveScenarioClassification (ro)": 37.29, + "MassiveScenarioClassification (ru)": 28.16, + "MassiveScenarioClassification (sl)": 37.95, + "MassiveScenarioClassification (sq)": 37.82, + "MassiveScenarioClassification (sv)": 35.35, + "MassiveScenarioClassification (sw)": 35.37, + "MassiveScenarioClassification (ta)": 7.19, + "MassiveScenarioClassification (te)": 7.29, + "MassiveScenarioClassification (th)": 9.47, + "MassiveScenarioClassification (tl)": 37.31, + "MassiveScenarioClassification (tr)": 34.57, + "MassiveScenarioClassification (ur)": 16.17, + "MassiveScenarioClassification (vi)": 35.91, + "MassiveScenarioClassification (zh-CN)": 9.19, + "MassiveScenarioClassification (zh-TW)": 10.19, "ToxicConversationsClassification": 57.44, "TweetSentimentExtractionClassification": 45.52 } @@ -5998,8 +6427,35 @@ "STS14": 54.87, "STS15": 62.54, "STS16": 64.27, - "STS17": 24.92, - "STS22": 8.44, + "STS17 (ar-ar)": 27.14, + "STS17 (en-ar)": 6.9, + "STS17 (en-de)": 11.59, + "STS17 (en-en)": 69.63, + "STS17 (en-tr)": 6.46, + "STS17 (es-en)": 10.86, + "STS17 (es-es)": 55.45, + "STS17 (fr-en)": 16.02, + "STS17 (it-en)": 19.87, + "STS17 (ko-ko)": 8.08, + "STS17 (nl-en)": 24.92, + "STS22 (ar)": 19.57, + "STS22 (de)": 17.31, + "STS22 (de-en)": 26.03, + "STS22 (de-fr)": 10.26, + "STS22 (de-pl)": 16.94, + "STS22 (en)": 55.06, + "STS22 (es)": 48.89, + "STS22 (es-en)": 51.79, + "STS22 (es-it)": 25.24, + "STS22 (fr)": 53.92, + "STS22 (fr-pl)": 39.44, + "STS22 (it)": 39.43, + "STS22 (pl)": 13.56, + "STS22 (pl-en)": 25.36, + "STS22 (ru)": 1.11, + "STS22 (tr)": 31.73, + "STS22 (zh)": 16.35, + "STS22 (zh-en)": 8.44, "STSBenchmark": 61.26 } ] @@ -6027,12 +6483,12 @@ "accuracy": [ { "Model": "bert-base-10lang-cased", - "AmazonReviewsClassification": 29.38, - "MTOPDomainClassification": 63.65, - "MTOPIntentClassification": 37.87, - "MasakhaNEWSClassification": 63.93, - "MassiveIntentClassification": 37.28, - "MassiveScenarioClassification": 44.5 + "AmazonReviewsClassification (fr)": 29.38, + "MTOPDomainClassification (fr)": 63.65, + "MTOPIntentClassification (fr)": 37.87, + "MasakhaNEWSClassification (fra)": 63.93, + "MassiveIntentClassification (fr)": 37.28, + "MassiveScenarioClassification (fr)": 44.5 } ] }, @@ -6045,8 +6501,8 @@ "HALClusteringS2S": 19.94, "MLSUMClusteringP2P": 40.96, "MLSUMClusteringS2S": 31.87, - "MasakhaNEWSClusteringP2P": 24.23, - "MasakhaNEWSClusteringS2S": 24.46 + "MasakhaNEWSClusteringP2P (fra)": 24.23, + "MasakhaNEWSClusteringS2S (fra)": 24.46 } ] }, @@ -6054,13 +6510,13 @@ "max_ap": [ { "Model": "bert-base-10lang-cased", - "OpusparcusPC": 86.79, - "PawsXPairClassification": 53.4 + "OpusparcusPC (fr)": 86.79, + "PawsXPairClassification (fr)": 53.4 }, { "Model": "bert-base-10lang-cased", - "OpusparcusPC": 87.78, - "PawsXPairClassification": 53.4 + "OpusparcusPC (fr)": 87.78, + "PawsXPairClassification (fr)": 53.4 } ] }, @@ -6079,9 +6535,9 @@ "Model": "bert-base-10lang-cased", "AlloprofRetrieval": 1.6, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 3.55, + "MintakaRetrieval (fr)": 3.55, "SyntecRetrieval": 18.95, - "XPQARetrieval": 18.39 + "XPQARetrieval (fr)": 18.39 } ] }, @@ -6090,8 +6546,8 @@ { "Model": "bert-base-10lang-cased", "SICKFr": 58.76, - "STS22": 40.31, - "STSBenchmarkMultilingualSTS": 52.25 + "STS22 (fr)": 40.31, + "STSBenchmarkMultilingualSTS (fr)": 52.25 } ] }, @@ -6118,12 +6574,12 @@ "accuracy": [ { "Model": "bert-base-15lang-cased", - "AmazonReviewsClassification": 29.35, - "MTOPDomainClassification": 63.7, - "MTOPIntentClassification": 37.85, - "MasakhaNEWSClassification": 63.89, - "MassiveIntentClassification": 37.28, - "MassiveScenarioClassification": 44.47 + "AmazonReviewsClassification (fr)": 29.35, + "MTOPDomainClassification (fr)": 63.7, + "MTOPIntentClassification (fr)": 37.85, + "MasakhaNEWSClassification (fra)": 63.89, + "MassiveIntentClassification (fr)": 37.28, + "MassiveScenarioClassification (fr)": 44.47 } ] }, @@ -6136,8 +6592,8 @@ "HALClusteringS2S": 20.26, "MLSUMClusteringP2P": 41.22, "MLSUMClusteringS2S": 31.88, - "MasakhaNEWSClusteringP2P": 24.23, - "MasakhaNEWSClusteringS2S": 24.46 + "MasakhaNEWSClusteringP2P (fra)": 24.23, + "MasakhaNEWSClusteringS2S (fra)": 24.46 } ] }, @@ -6145,13 +6601,13 @@ "max_ap": [ { "Model": "bert-base-15lang-cased", - "OpusparcusPC": 86.78, - "PawsXPairClassification": 53.38 + "OpusparcusPC (fr)": 86.78, + "PawsXPairClassification (fr)": 53.38 }, { "Model": "bert-base-15lang-cased", - "OpusparcusPC": 87.73, - "PawsXPairClassification": 53.38 + "OpusparcusPC (fr)": 87.73, + "PawsXPairClassification (fr)": 53.38 } ] }, @@ -6170,9 +6626,9 @@ "Model": "bert-base-15lang-cased", "AlloprofRetrieval": 1.61, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 3.55, + "MintakaRetrieval (fr)": 3.55, "SyntecRetrieval": 18.95, - "XPQARetrieval": 18.35 + "XPQARetrieval (fr)": 18.35 } ] }, @@ -6181,8 +6637,8 @@ { "Model": "bert-base-15lang-cased", "SICKFr": 58.77, - "STS22": 40.4, - "STSBenchmarkMultilingualSTS": 52.25 + "STS22 (fr)": 40.4, + "STSBenchmarkMultilingualSTS (fr)": 52.25 } ] }, @@ -6209,12 +6665,12 @@ "accuracy": [ { "Model": "bert-base-25lang-cased", - "AmazonReviewsClassification": 29.39, - "MTOPDomainClassification": 63.63, - "MTOPIntentClassification": 37.86, - "MasakhaNEWSClassification": 63.91, - "MassiveIntentClassification": 37.3, - "MassiveScenarioClassification": 44.47 + "AmazonReviewsClassification (fr)": 29.39, + "MTOPDomainClassification (fr)": 63.63, + "MTOPIntentClassification (fr)": 37.86, + "MasakhaNEWSClassification (fra)": 63.91, + "MassiveIntentClassification (fr)": 37.3, + "MassiveScenarioClassification (fr)": 44.47 } ] }, @@ -6227,8 +6683,8 @@ "HALClusteringS2S": 19.78, "MLSUMClusteringP2P": 40.73, "MLSUMClusteringS2S": 31.94, - "MasakhaNEWSClusteringP2P": 24.23, - "MasakhaNEWSClusteringS2S": 24.46 + "MasakhaNEWSClusteringP2P (fra)": 24.23, + "MasakhaNEWSClusteringS2S (fra)": 24.46 } ] }, @@ -6236,13 +6692,13 @@ "max_ap": [ { "Model": "bert-base-25lang-cased", - "OpusparcusPC": 86.79, - "PawsXPairClassification": 53.39 + "OpusparcusPC (fr)": 86.79, + "PawsXPairClassification (fr)": 53.39 }, { "Model": "bert-base-25lang-cased", - "OpusparcusPC": 87.78, - "PawsXPairClassification": 53.4 + "OpusparcusPC (fr)": 87.78, + "PawsXPairClassification (fr)": 53.4 } ] }, @@ -6261,9 +6717,9 @@ "Model": "bert-base-25lang-cased", "AlloprofRetrieval": 1.6, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 3.55, + "MintakaRetrieval (fr)": 3.55, "SyntecRetrieval": 18.95, - "XPQARetrieval": 18.46 + "XPQARetrieval (fr)": 18.46 } ] }, @@ -6272,8 +6728,8 @@ { "Model": "bert-base-25lang-cased", "SICKFr": 58.76, - "STS22": 38.77, - "STSBenchmarkMultilingualSTS": 52.25 + "STS22 (fr)": 38.77, + "STSBenchmarkMultilingualSTS (fr)": 52.25 } ] }, @@ -6300,12 +6756,12 @@ "accuracy": [ { "Model": "bert-base-multilingual-cased", - "AmazonReviewsClassification": 29.39, - "MTOPDomainClassification": 63.61, - "MTOPIntentClassification": 37.84, - "MasakhaNEWSClassification": 64.0, - "MassiveIntentClassification": 37.3, - "MassiveScenarioClassification": 44.47 + "AmazonReviewsClassification (fr)": 29.39, + "MTOPDomainClassification (fr)": 63.61, + "MTOPIntentClassification (fr)": 37.84, + "MasakhaNEWSClassification (fra)": 64.0, + "MassiveIntentClassification (fr)": 37.3, + "MassiveScenarioClassification (fr)": 44.47 } ] }, @@ -6318,8 +6774,8 @@ "HALClusteringS2S": 20.81, "MLSUMClusteringP2P": 40.9, "MLSUMClusteringS2S": 31.8, - "MasakhaNEWSClusteringP2P": 24.23, - "MasakhaNEWSClusteringS2S": 24.46 + "MasakhaNEWSClusteringP2P (fra)": 24.23, + "MasakhaNEWSClusteringS2S (fra)": 24.46 } ] }, @@ -6327,13 +6783,13 @@ "max_ap": [ { "Model": "bert-base-multilingual-cased", - "OpusparcusPC": 86.77, - "PawsXPairClassification": 53.39 + "OpusparcusPC (fr)": 86.77, + "PawsXPairClassification (fr)": 53.39 }, { "Model": "bert-base-multilingual-cased", - "OpusparcusPC": 87.76, - "PawsXPairClassification": 53.41 + "OpusparcusPC (fr)": 87.76, + "PawsXPairClassification (fr)": 53.41 } ] }, @@ -6352,9 +6808,9 @@ "Model": "bert-base-multilingual-cased", "AlloprofRetrieval": 1.63, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 3.55, + "MintakaRetrieval (fr)": 3.55, "SyntecRetrieval": 18.95, - "XPQARetrieval": 18.49 + "XPQARetrieval (fr)": 18.49 } ] }, @@ -6363,8 +6819,8 @@ { "Model": "bert-base-multilingual-cased", "SICKFr": 58.75, - "STS22": 39.05, - "STSBenchmarkMultilingualSTS": 52.25 + "STS22 (fr)": 39.05, + "STSBenchmarkMultilingualSTS (fr)": 52.25 } ] }, @@ -6391,12 +6847,12 @@ "accuracy": [ { "Model": "bert-base-multilingual-uncased", - "AmazonReviewsClassification": 29.02, - "MTOPDomainClassification": 64.49, - "MTOPIntentClassification": 39.4, - "MasakhaNEWSClassification": 75.69, - "MassiveIntentClassification": 38.01, - "MassiveScenarioClassification": 43.63 + "AmazonReviewsClassification (fr)": 29.02, + "MTOPDomainClassification (fr)": 64.49, + "MTOPIntentClassification (fr)": 39.4, + "MasakhaNEWSClassification (fra)": 75.69, + "MassiveIntentClassification (fr)": 38.01, + "MassiveScenarioClassification (fr)": 43.63 } ] }, @@ -6409,8 +6865,8 @@ "HALClusteringS2S": 20.9, "MLSUMClusteringP2P": 43.5, "MLSUMClusteringS2S": 30.99, - "MasakhaNEWSClusteringP2P": 49.71, - "MasakhaNEWSClusteringS2S": 42.23 + "MasakhaNEWSClusteringP2P (fra)": 49.71, + "MasakhaNEWSClusteringS2S (fra)": 42.23 } ] }, @@ -6418,13 +6874,13 @@ "max_ap": [ { "Model": "bert-base-multilingual-uncased", - "OpusparcusPC": 87.43, - "PawsXPairClassification": 53.22 + "OpusparcusPC (fr)": 87.43, + "PawsXPairClassification (fr)": 53.22 }, { "Model": "bert-base-multilingual-uncased", - "OpusparcusPC": 87.53, - "PawsXPairClassification": 53.33 + "OpusparcusPC (fr)": 87.53, + "PawsXPairClassification (fr)": 53.33 } ] }, @@ -6443,9 +6899,9 @@ "Model": "bert-base-multilingual-uncased", "AlloprofRetrieval": 5.51, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 2.87, + "MintakaRetrieval (fr)": 2.87, "SyntecRetrieval": 34.95, - "XPQARetrieval": 26.12 + "XPQARetrieval (fr)": 26.12 } ] }, @@ -6454,8 +6910,8 @@ { "Model": "bert-base-multilingual-uncased", "SICKFr": 58.26, - "STS22": 56.47, - "STSBenchmarkMultilingualSTS": 54.97 + "STS22 (fr)": 56.47, + "STSBenchmarkMultilingualSTS (fr)": 54.97 } ] }, @@ -6491,8 +6947,12 @@ "DKHateClassification": 55.53, "DanishPoliticalCommentsClassification": 28.97, "LccSentimentClassification": 41.2, - "MassiveIntentClassification": 52.75, - "MassiveScenarioClassification": 56.09, + "MassiveIntentClassification (da)": 37.98, + "MassiveIntentClassification (nb)": 35.75, + "MassiveIntentClassification (sv)": 52.75, + "MassiveScenarioClassification (da)": 40.44, + "MassiveScenarioClassification (nb)": 35.76, + "MassiveScenarioClassification (sv)": 56.09, "NoRecClassification": 43.91, "NordicLangClassification": 62.45, "NorwegianParliament": 57.56, @@ -6534,16 +6994,16 @@ "accuracy": [ { "Model": "bert-base-uncased", - "AmazonCounterfactualClassification": 74.25, + "AmazonCounterfactualClassification (en)": 74.25, "AmazonPolarityClassification": 71.33, - "AmazonReviewsClassification": 33.56, + "AmazonReviewsClassification (en)": 33.56, "Banking77Classification": 63.41, "EmotionClassification": 35.28, "ImdbClassification": 65.35, - "MTOPDomainClassification": 82.63, - "MTOPIntentClassification": 68.14, - "MassiveIntentClassification": 59.88, - "MassiveScenarioClassification": 64.28, + "MTOPDomainClassification (en)": 82.63, + "MTOPIntentClassification (en)": 68.14, + "MassiveIntentClassification (en)": 59.88, + "MassiveScenarioClassification (en)": 64.28, "ToxicConversationsClassification": 70.0, "TweetSentimentExtractionClassification": 51.81 } @@ -6627,8 +7087,8 @@ "STS14": 47.73, "STS15": 60.29, "STS16": 63.73, - "STS17": 64.1, - "STS22": 56.37, + "STS17 (en-en)": 64.1, + "STS22 (en)": 56.37, "STSBenchmark": 47.29 } ] @@ -6771,11 +7231,11 @@ "accuracy": [ { "Model": "bge-base-zh-v1.5", - "AmazonReviewsClassification": 40.15, + "AmazonReviewsClassification (zh)": 40.15, "IFlyTek": 48.62, "JDReview": 83.62, - "MassiveIntentClassification": 67.93, - "MassiveScenarioClassification": 73.98, + "MassiveIntentClassification (zh-CN)": 67.93, + "MassiveScenarioClassification (zh-CN)": 73.98, "MultilingualSentiment": 70.67, "OnlineShopping": 91.26, "TNews": 51.08, @@ -6844,7 +7304,7 @@ "LCQMC": 74.45, "PAWSX": 20.4, "QBQTC": 36.22, - "STS22": 68.01, + "STS22 (zh)": 68.01, "STSB": 78.31 } ] @@ -7002,11 +7462,11 @@ "accuracy": [ { "Model": "bge-large-zh-noinstruct", - "AmazonReviewsClassification": 41.94, + "AmazonReviewsClassification (zh)": 41.94, "IFlyTek": 45.32, "JDReview": 85.38, - "MassiveIntentClassification": 66.96, - "MassiveScenarioClassification": 73.39, + "MassiveIntentClassification (zh-CN)": 66.96, + "MassiveScenarioClassification (zh-CN)": 73.39, "MultilingualSentiment": 73.7, "OnlineShopping": 91.66, "TNews": 52.05, @@ -7075,7 +7535,7 @@ "LCQMC": 74.71, "PAWSX": 16.64, "QBQTC": 35.2, - "STS22": 67.19, + "STS22 (zh)": 67.19, "STSB": 78.41 } ] @@ -7098,11 +7558,11 @@ "accuracy": [ { "Model": "bge-large-zh-v1.5", - "AmazonReviewsClassification": 41.38, + "AmazonReviewsClassification (zh)": 41.38, "IFlyTek": 48.74, "JDReview": 85.14, - "MassiveIntentClassification": 68.84, - "MassiveScenarioClassification": 74.7, + "MassiveIntentClassification (zh-CN)": 68.84, + "MassiveScenarioClassification (zh-CN)": 74.7, "MultilingualSentiment": 72.97, "OnlineShopping": 91.43, "TNews": 52.1, @@ -7171,7 +7631,7 @@ "LCQMC": 74.33, "PAWSX": 33.92, "QBQTC": 37.29, - "STS22": 68.94, + "STS22 (zh)": 68.94, "STSB": 78.7 } ] @@ -7464,11 +7924,11 @@ "accuracy": [ { "Model": "bge-small-zh-v1.5", - "AmazonReviewsClassification": 35.91, + "AmazonReviewsClassification (zh)": 35.91, "IFlyTek": 45.49, "JDReview": 80.04, - "MassiveIntentClassification": 63.95, - "MassiveScenarioClassification": 70.8, + "MassiveIntentClassification (zh-CN)": 63.95, + "MassiveScenarioClassification (zh-CN)": 70.8, "MultilingualSentiment": 63.06, "OnlineShopping": 85.05, "TNews": 48.15, @@ -7537,7 +7997,7 @@ "LCQMC": 72.19, "PAWSX": 9.26, "QBQTC": 35.29, - "STS22": 67.72, + "STS22 (zh)": 67.72, "STSB": 76.73 } ] @@ -7730,16 +8190,16 @@ "accuracy": [ { "Model": "contriever-base-msmarco", - "AmazonCounterfactualClassification": 72.19, + "AmazonCounterfactualClassification (en)": 72.19, "AmazonPolarityClassification": 68.63, - "AmazonReviewsClassification": 37.42, + "AmazonReviewsClassification (en)": 37.42, "Banking77Classification": 80.02, "EmotionClassification": 44.77, "ImdbClassification": 67.04, - "MTOPDomainClassification": 93.18, - "MTOPIntentClassification": 69.31, - "MassiveIntentClassification": 67.78, - "MassiveScenarioClassification": 76.0, + "MTOPDomainClassification (en)": 93.18, + "MTOPIntentClassification (en)": 69.31, + "MassiveIntentClassification (en)": 67.78, + "MassiveScenarioClassification (en)": 76.0, "ToxicConversationsClassification": 67.77, "TweetSentimentExtractionClassification": 56.1 } @@ -7823,8 +8283,8 @@ "STS14": 74.51, "STS15": 83.3, "STS16": 79.67, - "STS17": 86.32, - "STS22": 64.64, + "STS17 (en-en)": 86.32, + "STS22 (en)": 64.64, "STSBenchmark": 78.81 } ] @@ -8053,8 +8513,12 @@ "DKHateClassification": 60.09, "DanishPoliticalCommentsClassification": 36.6, "LccSentimentClassification": 57.33, - "MassiveIntentClassification": 49.74, - "MassiveScenarioClassification": 50.1, + "MassiveIntentClassification (da)": 60.55, + "MassiveIntentClassification (nb)": 52.49, + "MassiveIntentClassification (sv)": 49.74, + "MassiveScenarioClassification (da)": 64.16, + "MassiveScenarioClassification (nb)": 54.59, + "MassiveScenarioClassification (sv)": 50.1, "NoRecClassification": 48.3, "NordicLangClassification": 77.68, "NorwegianParliament": 58.78, @@ -8197,18 +8661,136 @@ { "Model": "distiluse-base-multilingual-cased-v2", "AllegroReviews": 28.03, - "AmazonCounterfactualClassification": 65.39, + "AmazonCounterfactualClassification (de)": 68.14, + "AmazonCounterfactualClassification (en)": 71.81, + "AmazonCounterfactualClassification (en-ext)": 72.96, + "AmazonCounterfactualClassification (ja)": 65.39, "AmazonPolarityClassification": 68.0, - "AmazonReviewsClassification": 33.89, + "AmazonReviewsClassification (de)": 35.03, + "AmazonReviewsClassification (en)": 35.45, + "AmazonReviewsClassification (es)": 36.24, + "AmazonReviewsClassification (fr)": 35.7, + "AmazonReviewsClassification (ja)": 31.08, + "AmazonReviewsClassification (zh)": 33.89, "Banking77Classification": 71.48, "CBD": 60.0, "EmotionClassification": 40.04, "ImdbClassification": 61.52, - "MTOPDomainClassification": 73.62, - "MTOPIntentClassification": 47.73, - "MasakhaNEWSClassification": 76.87, - "MassiveIntentClassification": 54.96, - "MassiveScenarioClassification": 62.89, + "MTOPDomainClassification (de)": 86.19, + "MTOPDomainClassification (en)": 91.59, + "MTOPDomainClassification (es)": 87.75, + "MTOPDomainClassification (fr)": 84.61, + "MTOPDomainClassification (hi)": 76.41, + "MTOPDomainClassification (th)": 73.62, + "MTOPIntentClassification (de)": 59.21, + "MTOPIntentClassification (en)": 66.4, + "MTOPIntentClassification (es)": 57.21, + "MTOPIntentClassification (fr)": 53.41, + "MTOPIntentClassification (hi)": 45.54, + "MTOPIntentClassification (th)": 47.73, + "MasakhaNEWSClassification (fra)": 76.87, + "MassiveIntentClassification (af)": 40.02, + "MassiveIntentClassification (am)": 2.35, + "MassiveIntentClassification (ar)": 43.14, + "MassiveIntentClassification (az)": 25.6, + "MassiveIntentClassification (bn)": 4.84, + "MassiveIntentClassification (cy)": 15.43, + "MassiveIntentClassification (da)": 52.33, + "MassiveIntentClassification (de)": 51.57, + "MassiveIntentClassification (el)": 49.65, + "MassiveIntentClassification (en)": 66.71, + "MassiveIntentClassification (es)": 56.57, + "MassiveIntentClassification (fa)": 55.36, + "MassiveIntentClassification (fi)": 45.72, + "MassiveIntentClassification (fr)": 57.02, + "MassiveIntentClassification (he)": 46.74, + "MassiveIntentClassification (hi)": 48.55, + "MassiveIntentClassification (hu)": 50.65, + "MassiveIntentClassification (hy)": 40.79, + "MassiveIntentClassification (id)": 56.0, + "MassiveIntentClassification (is)": 16.08, + "MassiveIntentClassification (it)": 57.65, + "MassiveIntentClassification (ja)": 55.33, + "MassiveIntentClassification (jv)": 28.16, + "MassiveIntentClassification (ka)": 29.41, + "MassiveIntentClassification (km)": 4.79, + "MassiveIntentClassification (kn)": 3.37, + "MassiveIntentClassification (ko)": 49.97, + "MassiveIntentClassification (lv)": 44.31, + "MassiveIntentClassification (ml)": 3.24, + "MassiveIntentClassification (mn)": 40.37, + "MassiveIntentClassification (ms)": 47.97, + "MassiveIntentClassification (my)": 38.48, + "MassiveIntentClassification (nb)": 46.01, + "MassiveIntentClassification (nl)": 58.29, + "MassiveIntentClassification (pl)": 53.1, + "MassiveIntentClassification (pt)": 58.63, + "MassiveIntentClassification (ro)": 50.63, + "MassiveIntentClassification (ru)": 57.96, + "MassiveIntentClassification (sl)": 50.66, + "MassiveIntentClassification (sq)": 50.25, + "MassiveIntentClassification (sv)": 52.41, + "MassiveIntentClassification (sw)": 19.29, + "MassiveIntentClassification (ta)": 3.79, + "MassiveIntentClassification (te)": 3.36, + "MassiveIntentClassification (th)": 45.28, + "MassiveIntentClassification (tl)": 28.44, + "MassiveIntentClassification (tr)": 50.47, + "MassiveIntentClassification (ur)": 46.03, + "MassiveIntentClassification (vi)": 45.25, + "MassiveIntentClassification (zh-CN)": 59.22, + "MassiveIntentClassification (zh-TW)": 54.96, + "MassiveScenarioClassification (af)": 53.67, + "MassiveScenarioClassification (am)": 7.72, + "MassiveScenarioClassification (ar)": 52.19, + "MassiveScenarioClassification (az)": 34.75, + "MassiveScenarioClassification (bn)": 10.65, + "MassiveScenarioClassification (cy)": 21.24, + "MassiveScenarioClassification (da)": 62.55, + "MassiveScenarioClassification (de)": 61.4, + "MassiveScenarioClassification (el)": 60.68, + "MassiveScenarioClassification (en)": 74.0, + "MassiveScenarioClassification (es)": 64.61, + "MassiveScenarioClassification (fa)": 59.24, + "MassiveScenarioClassification (fi)": 54.66, + "MassiveScenarioClassification (fr)": 65.2, + "MassiveScenarioClassification (he)": 54.74, + "MassiveScenarioClassification (hi)": 55.99, + "MassiveScenarioClassification (hu)": 61.2, + "MassiveScenarioClassification (hy)": 49.63, + "MassiveScenarioClassification (id)": 65.25, + "MassiveScenarioClassification (is)": 22.6, + "MassiveScenarioClassification (it)": 64.63, + "MassiveScenarioClassification (ja)": 62.32, + "MassiveScenarioClassification (jv)": 35.77, + "MassiveScenarioClassification (ka)": 39.08, + "MassiveScenarioClassification (km)": 9.24, + "MassiveScenarioClassification (kn)": 8.28, + "MassiveScenarioClassification (ko)": 57.6, + "MassiveScenarioClassification (lv)": 51.72, + "MassiveScenarioClassification (ml)": 8.25, + "MassiveScenarioClassification (mn)": 47.21, + "MassiveScenarioClassification (ms)": 55.65, + "MassiveScenarioClassification (my)": 43.31, + "MassiveScenarioClassification (nb)": 54.98, + "MassiveScenarioClassification (nl)": 67.49, + "MassiveScenarioClassification (pl)": 61.29, + "MassiveScenarioClassification (pt)": 64.26, + "MassiveScenarioClassification (ro)": 58.03, + "MassiveScenarioClassification (ru)": 65.41, + "MassiveScenarioClassification (sl)": 59.36, + "MassiveScenarioClassification (sq)": 62.69, + "MassiveScenarioClassification (sv)": 64.35, + "MassiveScenarioClassification (sw)": 25.12, + "MassiveScenarioClassification (ta)": 8.67, + "MassiveScenarioClassification (te)": 7.82, + "MassiveScenarioClassification (th)": 54.65, + "MassiveScenarioClassification (tl)": 36.09, + "MassiveScenarioClassification (tr)": 60.89, + "MassiveScenarioClassification (ur)": 54.71, + "MassiveScenarioClassification (vi)": 55.15, + "MassiveScenarioClassification (zh-CN)": 66.44, + "MassiveScenarioClassification (zh-TW)": 62.89, "PAC": 68.17, "PolEmo2.0-IN": 48.84, "PolEmo2.0-OUT": 30.0, @@ -8228,8 +8810,8 @@ "HALClusteringS2S": 18.2, "MLSUMClusteringP2P": 40.17, "MLSUMClusteringS2S": 34.65, - "MasakhaNEWSClusteringP2P": 53.76, - "MasakhaNEWSClusteringS2S": 32.76 + "MasakhaNEWSClusteringP2P (fra)": 53.76, + "MasakhaNEWSClusteringS2S (fra)": 32.76 } ] }, @@ -8238,10 +8820,10 @@ { "Model": "distiluse-base-multilingual-cased-v2", "CDSC-E": 71.83, - "OpusparcusPC": 92.07, + "OpusparcusPC (fr)": 92.07, "PPC": 86.83, "PSC": 96.35, - "PawsXPairClassification": 51.08, + "PawsXPairClassification (fr)": 51.08, "SICK-E-PL": 62.05, "SprintDuplicateQuestions": 87.15, "TwitterSemEval2015": 61.67, @@ -8250,10 +8832,10 @@ { "Model": "distiluse-base-multilingual-cased-v2", "CDSC-E": 72.0, - "OpusparcusPC": 92.07, + "OpusparcusPC (fr)": 92.07, "PPC": 86.83, "PSC": 96.35, - "PawsXPairClassification": 51.16, + "PawsXPairClassification (fr)": 51.16, "SICK-E-PL": 62.41, "SprintDuplicateQuestions": 88.43, "TwitterSemEval2015": 62.46, @@ -8285,7 +8867,7 @@ "FiQA-PL": 8.02, "HotpotQA-PL": 20.83, "MSMARCO-PL": 4.57, - "MintakaRetrieval": 22.55, + "MintakaRetrieval (fr)": 22.55, "NFCorpus-PL": 16.28, "NQ-PL": 5.85, "Quora-PL": 71.95, @@ -8293,7 +8875,7 @@ "SciFact-PL": 33.03, "SyntecRetrieval": 65.34, "TRECCOVID-PL": 16.91, - "XPQARetrieval": 51.2 + "XPQARetrieval (fr)": 51.2 } ] }, @@ -8311,10 +8893,37 @@ "STS14": 70.29, "STS15": 81.94, "STS16": 76.8, - "STS17": 80.51, - "STS22": 61.75, + "STS17 (ar-ar)": 77.34, + "STS17 (en-ar)": 77.46, + "STS17 (en-de)": 80.24, + "STS17 (en-en)": 86.19, + "STS17 (en-tr)": 74.34, + "STS17 (es-en)": 77.4, + "STS17 (es-es)": 83.71, + "STS17 (fr-en)": 79.28, + "STS17 (it-en)": 80.82, + "STS17 (ko-ko)": 76.4, + "STS17 (nl-en)": 80.51, + "STS22 (ar)": 49.04, + "STS22 (de)": 35.73, + "STS22 (de-en)": 47.51, + "STS22 (de-fr)": 60.76, + "STS22 (de-pl)": 36.09, + "STS22 (en)": 62.88, + "STS22 (es)": 59.34, + "STS22 (es-en)": 68.96, + "STS22 (es-it)": 63.28, + "STS22 (fr)": 76.41, + "STS22 (fr-pl)": 61.98, + "STS22 (it)": 65.1, + "STS22 (pl)": 34.58, + "STS22 (pl-en)": 71.33, + "STS22 (ru)": 52.4, + "STS22 (tr)": 54.07, + "STS22 (zh)": 54.32, + "STS22 (zh-en)": 61.75, "STSBenchmark": 80.75, - "STSBenchmarkMultilingualSTS": 77.49 + "STSBenchmarkMultilingualSTS (fr)": 77.49 } ] }, @@ -8452,8 +9061,12 @@ "DKHateClassification": 58.51, "DanishPoliticalCommentsClassification": 28.43, "LccSentimentClassification": 37.47, - "MassiveIntentClassification": 41.34, - "MassiveScenarioClassification": 50.0, + "MassiveIntentClassification (da)": 44.25, + "MassiveIntentClassification (nb)": 41.57, + "MassiveIntentClassification (sv)": 41.34, + "MassiveScenarioClassification (da)": 52.99, + "MassiveScenarioClassification (nb)": 50.33, + "MassiveScenarioClassification (sv)": 50.0, "NoRecClassification": 42.0, "NordicLangClassification": 59.34, "NorwegianParliament": 57.42, @@ -8604,8 +9217,12 @@ "DKHateClassification": 58.72, "DanishPoliticalCommentsClassification": 28.67, "LccSentimentClassification": 42.13, - "MassiveIntentClassification": 40.69, - "MassiveScenarioClassification": 50.97, + "MassiveIntentClassification (da)": 42.29, + "MassiveIntentClassification (nb)": 40.63, + "MassiveIntentClassification (sv)": 40.69, + "MassiveScenarioClassification (da)": 52.95, + "MassiveScenarioClassification (nb)": 51.91, + "MassiveScenarioClassification (sv)": 50.97, "NoRecClassification": 41.83, "NordicLangClassification": 58.3, "NorwegianParliament": 57.26, @@ -8825,10 +9442,9 @@ "AmazonReviewsClassification (en)": 55.97, "AmazonReviewsClassification (de)": 54.94, "AmazonReviewsClassification (es)": 51.62, - "AmazonReviewsClassification (fr)": 50.27, + "AmazonReviewsClassification (fr)": 36.71, "AmazonReviewsClassification (ja)": 51.32, "AmazonReviewsClassification (zh)": 47.6, - "AmazonReviewsClassification": 36.71, "AngryTweetsClassification": 65.01, "Banking77Classification": 81.41, "CBD": 72.59, @@ -8845,20 +9461,18 @@ "MTOPDomainClassification (en)": 95.33, "MTOPDomainClassification (de)": 90.48, "MTOPDomainClassification (es)": 90.22, - "MTOPDomainClassification (fr)": 88.49, + "MTOPDomainClassification (fr)": 74.8, "MTOPDomainClassification (hi)": 86.67, "MTOPDomainClassification (th)": 83.42, - "MTOPDomainClassification": 74.8, "MTOPIntentClassification (en)": 78.99, "MTOPIntentClassification (de)": 69.37, "MTOPIntentClassification (es)": 73.45, - "MTOPIntentClassification (fr)": 70.6, + "MTOPIntentClassification (fr)": 53.97, "MTOPIntentClassification (hi)": 59.78, "MTOPIntentClassification (th)": 62.24, - "MTOPIntentClassification": 53.97, "MasakhaNEWSClassification (amh)": 53.11, "MasakhaNEWSClassification (eng)": 85.89, - "MasakhaNEWSClassification (fra)": 82.94, + "MasakhaNEWSClassification (fra)": 80.59, "MasakhaNEWSClassification (hau)": 76.73, "MasakhaNEWSClassification (ibo)": 77.49, "MasakhaNEWSClassification (lin)": 83.94, @@ -8872,7 +9486,6 @@ "MasakhaNEWSClassification (tir)": 33.05, "MasakhaNEWSClassification (xho)": 87.71, "MasakhaNEWSClassification (yor)": 85.55, - "MasakhaNEWSClassification": 80.59, "MassiveIntentClassification (ro)": 63.81, "MassiveIntentClassification (bn)": 58.21, "MassiveIntentClassification (ja)": 71.72, @@ -8895,7 +9508,7 @@ "MassiveIntentClassification (es)": 70.88, "MassiveIntentClassification (ur)": 57.26, "MassiveIntentClassification (ar)": 57.67, - "MassiveIntentClassification (fr)": 71.66, + "MassiveIntentClassification (fr)": 46.39, "MassiveIntentClassification (nb)": 65.35, "MassiveIntentClassification (de)": 70.52, "MassiveIntentClassification (az)": 58.14, @@ -8924,7 +9537,6 @@ "MassiveIntentClassification (da)": 68.11, "MassiveIntentClassification (en)": 77.0, "MassiveIntentClassification (kn)": 45.41, - "MassiveIntentClassification": 46.39, "MassiveScenarioClassification (id)": 73.78, "MassiveScenarioClassification (km)": 48.42, "MassiveScenarioClassification (pt)": 74.21, @@ -8959,7 +9571,7 @@ "MassiveScenarioClassification (fa)": 74.61, "MassiveScenarioClassification (da)": 73.77, "MassiveScenarioClassification (kn)": 55.85, - "MassiveScenarioClassification (fr)": 75.65, + "MassiveScenarioClassification (fr)": 53.86, "MassiveScenarioClassification (ms)": 72.85, "MassiveScenarioClassification (ja)": 75.79, "MassiveScenarioClassification (hi)": 69.65, @@ -8976,7 +9588,6 @@ "MassiveScenarioClassification (it)": 74.88, "MassiveScenarioClassification (sw)": 62.31, "MassiveScenarioClassification (vi)": 71.92, - "MassiveScenarioClassification": 53.86, "MultilingualSentiment": 73.47, "NoRecClassification": 55.0, "NordicLangClassification": 69.13, @@ -9006,7 +9617,7 @@ "MLSUMClusteringS2S": 32.0, "MasakhaNEWSClusteringP2P (amh)": 47.57, "MasakhaNEWSClusteringP2P (eng)": 71.97, - "MasakhaNEWSClusteringP2P (fra)": 81.18, + "MasakhaNEWSClusteringP2P (fra)": 52.47, "MasakhaNEWSClusteringP2P (hau)": 72.23, "MasakhaNEWSClusteringP2P (ibo)": 66.65, "MasakhaNEWSClusteringP2P (lin)": 75.85, @@ -9020,10 +9631,9 @@ "MasakhaNEWSClusteringP2P (tir)": 46.94, "MasakhaNEWSClusteringP2P (xho)": 59.17, "MasakhaNEWSClusteringP2P (yor)": 65.86, - "MasakhaNEWSClusteringP2P": 52.47, "MasakhaNEWSClusteringS2S (amh)": 47.55, "MasakhaNEWSClusteringS2S (eng)": 74.23, - "MasakhaNEWSClusteringS2S (fra)": 74.9, + "MasakhaNEWSClusteringS2S (fra)": 49.2, "MasakhaNEWSClusteringS2S (hau)": 40.78, "MasakhaNEWSClusteringS2S (ibo)": 59.84, "MasakhaNEWSClusteringS2S (lin)": 80.96, @@ -9037,7 +9647,6 @@ "MasakhaNEWSClusteringS2S (tir)": 43.92, "MasakhaNEWSClusteringS2S (xho)": 26.61, "MasakhaNEWSClusteringS2S (yor)": 63.18, - "MasakhaNEWSClusteringS2S": 49.2, "RuSciBenchGRNTIClusteringP2P": 62.27, "RuSciBenchOECDClusteringP2P": 54.13 } @@ -9074,19 +9683,17 @@ "OpusparcusPC (de)": 97.63, "OpusparcusPC (en)": 99.1, "OpusparcusPC (fi)": 92.76, - "OpusparcusPC (fr)": 95.23, + "OpusparcusPC (fr)": 89.71, "OpusparcusPC (ru)": 91.44, "OpusparcusPC (sv)": 95.54, - "OpusparcusPC": 89.71, "PSC": 99.53, "PawsXPairClassification (de)": 58.67, "PawsXPairClassification (en)": 67.34, "PawsXPairClassification (es)": 60.24, - "PawsXPairClassification (fr)": 62.03, + "PawsXPairClassification (fr)": 64.33, "PawsXPairClassification (ja)": 52.15, "PawsXPairClassification (ko)": 53.14, "PawsXPairClassification (zh)": 59.4, - "PawsXPairClassification": 64.33, "SICK-E-PL": 79.94, "SprintDuplicateQuestions": 95.86, "TERRa": 60.81, @@ -9095,8 +9702,8 @@ }, { "Model": "e5-mistral-7b-instruct", - "OpusparcusPC": 88.5, - "PawsXPairClassification": 63.65 + "OpusparcusPC (fr)": 88.5, + "PawsXPairClassification (fr)": 63.65 } ] }, @@ -9185,12 +9792,11 @@ "MintakaRetrieval (ar)": 24.33, "MintakaRetrieval (de)": 46.42, "MintakaRetrieval (es)": 44.42, - "MintakaRetrieval (fr)": 46.07, + "MintakaRetrieval (fr)": 3.57, "MintakaRetrieval (hi)": 25.5, "MintakaRetrieval (it)": 43.36, "MintakaRetrieval (ja)": 33.72, "MintakaRetrieval (pt)": 47.49, - "MintakaRetrieval": 3.57, "NFCorpus": 38.58, "NFCorpus-PL": 30.97, "PIQA": 39.83, @@ -9255,7 +9861,7 @@ "XPQARetrieval (cmn-cmn)": 63.0, "XPQARetrieval (eng-cmn)": 34.01, "XPQARetrieval (cmn-eng)": 56.43, - "XPQARetrieval": 41.29 + "XPQARetrieval (fr)": 41.29 } ], "recall_at_1": [ @@ -9365,8 +9971,8 @@ { "Model": "e5-mistral-7b-instruct", "SICKFr": 64.39, - "STS22": 69.82, - "STSBenchmarkMultilingualSTS": 61.87 + "STS22 (fr)": 69.82, + "STSBenchmarkMultilingualSTS (fr)": 61.87 } ] }, @@ -9476,8 +10082,12 @@ "DKHateClassification": 57.57, "DanishPoliticalCommentsClassification": 28.37, "LccSentimentClassification": 40.27, - "MassiveIntentClassification": 40.07, - "MassiveScenarioClassification": 47.06, + "MassiveIntentClassification (da)": 41.89, + "MassiveIntentClassification (nb)": 40.25, + "MassiveIntentClassification (sv)": 40.07, + "MassiveScenarioClassification (da)": 49.93, + "MassiveScenarioClassification (nb)": 48.58, + "MassiveScenarioClassification (sv)": 47.06, "NoRecClassification": 41.84, "NordicLangClassification": 53.47, "NorwegianParliament": 56.57, @@ -9541,8 +10151,12 @@ "DKHateClassification": 59.45, "DanishPoliticalCommentsClassification": 31.89, "LccSentimentClassification": 47.93, - "MassiveIntentClassification": 27.58, - "MassiveScenarioClassification": 29.93, + "MassiveIntentClassification (da)": 26.3, + "MassiveIntentClassification (nb)": 24.6, + "MassiveIntentClassification (sv)": 27.58, + "MassiveScenarioClassification (da)": 28.93, + "MassiveScenarioClassification (nb)": 27.3, + "MassiveScenarioClassification (sv)": 29.93, "NoRecClassification": 45.44, "NordicLangClassification": 57.82, "NorwegianParliament": 53.25, @@ -9593,8 +10207,12 @@ "DKHateClassification": 52.28, "DanishPoliticalCommentsClassification": 25.17, "LccSentimentClassification": 36.67, - "MassiveIntentClassification": 6.6, - "MassiveScenarioClassification": 12.16, + "MassiveIntentClassification (da)": 6.51, + "MassiveIntentClassification (nb)": 5.66, + "MassiveIntentClassification (sv)": 6.6, + "MassiveScenarioClassification (da)": 11.5, + "MassiveScenarioClassification (nb)": 11.26, + "MassiveScenarioClassification (sv)": 12.16, "NoRecClassification": 39.72, "NordicLangClassification": 44.53, "NorwegianParliament": 52.44, @@ -9636,16 +10254,16 @@ "accuracy": [ { "Model": "elser-v2", - "AmazonCounterfactualClassification": 74.16, + "AmazonCounterfactualClassification (en)": 74.16, "AmazonPolarityClassification": 61.91, - "AmazonReviewsClassification": 32.06, + "AmazonReviewsClassification (en)": 32.06, "Banking77Classification": 82.05, "EmotionClassification": 46.65, "ImdbClassification": 65.02, - "MTOPDomainClassification": 93.17, - "MTOPIntentClassification": 71.1, - "MassiveIntentClassification": 68.48, - "MassiveScenarioClassification": 74.98, + "MTOPDomainClassification (en)": 93.17, + "MTOPIntentClassification (en)": 71.1, + "MassiveIntentClassification (en)": 68.48, + "MassiveScenarioClassification (en)": 74.98, "ToxicConversationsClassification": 68.15, "TweetSentimentExtractionClassification": 53.57 } @@ -9729,8 +10347,8 @@ "STS14": 74.96, "STS15": 83.7, "STS16": 80.55, - "STS17": 85.74, - "STS22": 67.5, + "STS17 (en-en)": 85.74, + "STS22 (en)": 67.5, "STSBenchmark": 79.54 } ] @@ -9836,12 +10454,12 @@ "accuracy": [ { "Model": "flaubert_base_cased", - "AmazonReviewsClassification": 24.9, - "MTOPDomainClassification": 25.55, - "MTOPIntentClassification": 9.49, - "MasakhaNEWSClassification": 71.14, - "MassiveIntentClassification": 6.98, - "MassiveScenarioClassification": 11.41 + "AmazonReviewsClassification (fr)": 24.9, + "MTOPDomainClassification (fr)": 25.55, + "MTOPIntentClassification (fr)": 9.49, + "MasakhaNEWSClassification (fra)": 71.14, + "MassiveIntentClassification (fr)": 6.98, + "MassiveScenarioClassification (fr)": 11.41 } ] }, @@ -9854,8 +10472,8 @@ "HALClusteringS2S": 3.85, "MLSUMClusteringP2P": 39.06, "MLSUMClusteringS2S": 17.13, - "MasakhaNEWSClusteringP2P": 41.61, - "MasakhaNEWSClusteringS2S": 21.26 + "MasakhaNEWSClusteringP2P (fra)": 41.61, + "MasakhaNEWSClusteringS2S (fra)": 21.26 } ] }, @@ -9863,13 +10481,13 @@ "max_ap": [ { "Model": "flaubert_base_cased", - "OpusparcusPC": 82.15, - "PawsXPairClassification": 51.89 + "OpusparcusPC (fr)": 82.15, + "PawsXPairClassification (fr)": 51.89 }, { "Model": "flaubert_base_cased", - "OpusparcusPC": 82.15, - "PawsXPairClassification": 52.19 + "OpusparcusPC (fr)": 82.15, + "PawsXPairClassification (fr)": 52.19 } ] }, @@ -9888,9 +10506,9 @@ "Model": "flaubert_base_cased", "AlloprofRetrieval": 1.63, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 0.58, + "MintakaRetrieval (fr)": 0.58, "SyntecRetrieval": 20.56, - "XPQARetrieval": 6.59 + "XPQARetrieval (fr)": 6.59 } ] }, @@ -9899,8 +10517,8 @@ { "Model": "flaubert_base_cased", "SICKFr": 53.86, - "STS22": 65.37, - "STSBenchmarkMultilingualSTS": 37.14 + "STS22 (fr)": 65.37, + "STSBenchmarkMultilingualSTS (fr)": 37.14 } ] }, @@ -9927,12 +10545,12 @@ "accuracy": [ { "Model": "flaubert_base_uncased", - "AmazonReviewsClassification": 23.52, - "MTOPDomainClassification": 27.74, - "MTOPIntentClassification": 8.61, - "MasakhaNEWSClassification": 62.61, - "MassiveIntentClassification": 6.24, - "MassiveScenarioClassification": 10.98 + "AmazonReviewsClassification (fr)": 23.52, + "MTOPDomainClassification (fr)": 27.74, + "MTOPIntentClassification (fr)": 8.61, + "MasakhaNEWSClassification (fra)": 62.61, + "MassiveIntentClassification (fr)": 6.24, + "MassiveScenarioClassification (fr)": 10.98 } ] }, @@ -9945,8 +10563,8 @@ "HALClusteringS2S": 1.8, "MLSUMClusteringP2P": 33.22, "MLSUMClusteringS2S": 14.9, - "MasakhaNEWSClusteringP2P": 28.49, - "MasakhaNEWSClusteringS2S": 22.58 + "MasakhaNEWSClusteringP2P (fra)": 28.49, + "MasakhaNEWSClusteringS2S (fra)": 22.58 } ] }, @@ -9954,13 +10572,13 @@ "max_ap": [ { "Model": "flaubert_base_uncased", - "OpusparcusPC": 82.0, - "PawsXPairClassification": 52.78 + "OpusparcusPC (fr)": 82.0, + "PawsXPairClassification (fr)": 52.78 }, { "Model": "flaubert_base_uncased", - "OpusparcusPC": 82.0, - "PawsXPairClassification": 52.89 + "OpusparcusPC (fr)": 82.0, + "PawsXPairClassification (fr)": 52.89 } ] }, @@ -9979,9 +10597,9 @@ "Model": "flaubert_base_uncased", "AlloprofRetrieval": 1.72, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 0.51, + "MintakaRetrieval (fr)": 0.51, "SyntecRetrieval": 22.33, - "XPQARetrieval": 9.09 + "XPQARetrieval (fr)": 9.09 } ] }, @@ -9990,8 +10608,8 @@ { "Model": "flaubert_base_uncased", "SICKFr": 41.9, - "STS22": 55.15, - "STSBenchmarkMultilingualSTS": 33.41 + "STS22 (fr)": 55.15, + "STSBenchmarkMultilingualSTS (fr)": 33.41 } ] }, @@ -10018,12 +10636,12 @@ "accuracy": [ { "Model": "flaubert_large_cased", - "AmazonReviewsClassification": 22.45, - "MTOPDomainClassification": 24.27, - "MTOPIntentClassification": 9.79, - "MasakhaNEWSClassification": 55.64, - "MassiveIntentClassification": 16.41, - "MassiveScenarioClassification": 22.72 + "AmazonReviewsClassification (fr)": 22.45, + "MTOPDomainClassification (fr)": 24.27, + "MTOPIntentClassification (fr)": 9.79, + "MasakhaNEWSClassification (fra)": 55.64, + "MassiveIntentClassification (fr)": 16.41, + "MassiveScenarioClassification (fr)": 22.72 } ] }, @@ -10036,8 +10654,8 @@ "HALClusteringS2S": 5.26, "MLSUMClusteringP2P": 38.09, "MLSUMClusteringS2S": 18.71, - "MasakhaNEWSClusteringP2P": 26.43, - "MasakhaNEWSClusteringS2S": 24.68 + "MasakhaNEWSClusteringP2P (fra)": 26.43, + "MasakhaNEWSClusteringS2S (fra)": 24.68 } ] }, @@ -10045,13 +10663,13 @@ "max_ap": [ { "Model": "flaubert_large_cased", - "OpusparcusPC": 74.78, - "PawsXPairClassification": 54.14 + "OpusparcusPC (fr)": 74.78, + "PawsXPairClassification (fr)": 54.14 }, { "Model": "flaubert_large_cased", - "OpusparcusPC": 85.91, - "PawsXPairClassification": 54.94 + "OpusparcusPC (fr)": 85.91, + "PawsXPairClassification (fr)": 54.94 } ] }, @@ -10070,9 +10688,9 @@ "Model": "flaubert_large_cased", "AlloprofRetrieval": 0.58, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 0.26, + "MintakaRetrieval (fr)": 0.26, "SyntecRetrieval": 1.58, - "XPQARetrieval": 3.69 + "XPQARetrieval (fr)": 3.69 } ] }, @@ -10081,8 +10699,8 @@ { "Model": "flaubert_large_cased", "SICKFr": 34.6, - "STS22": 48.52, - "STSBenchmarkMultilingualSTS": 15.66 + "STS22 (fr)": 48.52, + "STSBenchmarkMultilingualSTS (fr)": 15.66 } ] }, @@ -10266,7 +10884,10 @@ "f1": [ { "Model": "glove.6B.300d", - "BUCC": 0.0 + "BUCC (de-en)": 0.18, + "BUCC (fr-en)": 0.19, + "BUCC (ru-en)": 0.1, + "BUCC (zh-en)": 0.0 } ] }, @@ -10274,16 +10895,16 @@ "accuracy": [ { "Model": "glove.6B.300d", - "AmazonCounterfactualClassification": 56.91, + "AmazonCounterfactualClassification (en)": 56.91, "AmazonPolarityClassification": 60.32, - "AmazonReviewsClassification": 29.67, + "AmazonReviewsClassification (en)": 29.67, "Banking77Classification": 67.69, "EmotionClassification": 36.93, "ImdbClassification": 62.57, - "MTOPDomainClassification": 79.11, - "MTOPIntentClassification": 55.85, - "MassiveIntentClassification": 56.19, - "MassiveScenarioClassification": 66.03, + "MTOPDomainClassification (en)": 79.11, + "MTOPIntentClassification (en)": 55.85, + "MassiveIntentClassification (en)": 56.19, + "MassiveScenarioClassification (en)": 66.03, "ToxicConversationsClassification": 65.4, "TweetSentimentExtractionClassification": 50.8 } @@ -10367,8 +10988,8 @@ "STS14": 60.81, "STS15": 72.31, "STS16": 65.34, - "STS17": 77.95, - "STS22": 56.35, + "STS17 (en-en)": 77.95, + "STS22 (en)": 56.35, "STSBenchmark": 61.54 } ] @@ -10396,16 +11017,16 @@ "accuracy": [ { "Model": "google-gecko-256.text-embedding-preview-0409", - "AmazonCounterfactualClassification": 70.93, + "AmazonCounterfactualClassification (en)": 70.93, "AmazonPolarityClassification": 97.34, - "AmazonReviewsClassification": 48.47, + "AmazonReviewsClassification (en)": 48.47, "Banking77Classification": 86.01, "EmotionClassification": 51.53, "ImdbClassification": 95.7, - "MTOPDomainClassification": 98.02, - "MTOPIntentClassification": 77.82, - "MassiveIntentClassification": 75.67, - "MassiveScenarioClassification": 85.16, + "MTOPDomainClassification (en)": 98.02, + "MTOPIntentClassification (en)": 77.82, + "MassiveIntentClassification (en)": 75.67, + "MassiveScenarioClassification (en)": 85.16, "ToxicConversationsClassification": 88.33, "TweetSentimentExtractionClassification": 72.97 } @@ -10489,8 +11110,8 @@ "STS14": 85.44, "STS15": 89.64, "STS16": 87.24, - "STS17": 90.46, - "STS22": 67.99, + "STS17 (en-en)": 90.46, + "STS22 (en)": 67.99, "STSBenchmark": 89.33 } ] @@ -10518,16 +11139,16 @@ "accuracy": [ { "Model": "google-gecko.text-embedding-preview-0409", - "AmazonCounterfactualClassification": 75.34, + "AmazonCounterfactualClassification (en)": 75.34, "AmazonPolarityClassification": 97.34, - "AmazonReviewsClassification": 51.17, + "AmazonReviewsClassification (en)": 51.17, "Banking77Classification": 88.62, "EmotionClassification": 52.51, "ImdbClassification": 95.65, - "MTOPDomainClassification": 98.35, - "MTOPIntentClassification": 83.43, - "MassiveIntentClassification": 80.22, - "MassiveScenarioClassification": 87.19, + "MTOPDomainClassification (en)": 98.35, + "MTOPIntentClassification (en)": 83.43, + "MassiveIntentClassification (en)": 80.22, + "MassiveScenarioClassification (en)": 87.19, "ToxicConversationsClassification": 89.67, "TweetSentimentExtractionClassification": 74.52 } @@ -10636,8 +11257,8 @@ "STS14": 85.25, "STS15": 89.66, "STS16": 87.34, - "STS17": 92.06, - "STS22": 68.02, + "STS17 (en-en)": 92.06, + "STS22 (en)": 68.02, "STSBenchmark": 88.99 } ] @@ -10712,18 +11333,21 @@ "accuracy": [ { "Model": "gte-Qwen1.5-7B-instruct", - "AmazonCounterfactualClassification": 83.16, + "AmazonCounterfactualClassification (en)": 83.16, "AmazonPolarityClassification": 96.7, - "AmazonReviewsClassification": 52.95, + "AmazonReviewsClassification (en)": 62.17, + "AmazonReviewsClassification (zh)": 52.95, "Banking77Classification": 81.68, "EmotionClassification": 54.53, "IFlyTek": 53.77, "ImdbClassification": 95.58, "JDReview": 88.2, - "MTOPDomainClassification": 95.75, - "MTOPIntentClassification": 84.26, - "MassiveIntentClassification": 78.47, - "MassiveScenarioClassification": 77.26, + "MTOPDomainClassification (en)": 95.75, + "MTOPIntentClassification (en)": 84.26, + "MassiveIntentClassification (zh-CN)": 76.25, + "MassiveIntentClassification (en)": 78.47, + "MassiveScenarioClassification (en)": 78.19, + "MassiveScenarioClassification (zh-CN)": 77.26, "MultilingualSentiment": 77.42, "OnlineShopping": 94.48, "TNews": 51.24, @@ -10861,8 +11485,9 @@ "STS14": 83.32, "STS15": 87.5, "STS16": 86.39, - "STS17": 87.79, - "STS22": 67.36, + "STS17 (en-en)": 87.79, + "STS22 (en)": 66.4, + "STS22 (zh)": 67.36, "STSB": 81.37, "STSBenchmark": 87.35 } @@ -10952,16 +11577,16 @@ "accuracy": [ { "Model": "gtr-t5-base", - "AmazonCounterfactualClassification": 69.33, + "AmazonCounterfactualClassification (en)": 69.33, "AmazonPolarityClassification": 67.82, - "AmazonReviewsClassification": 38.48, + "AmazonReviewsClassification (en)": 38.48, "Banking77Classification": 79.26, "EmotionClassification": 42.2, "ImdbClassification": 65.99, - "MTOPDomainClassification": 92.42, - "MTOPIntentClassification": 62.44, - "MassiveIntentClassification": 67.05, - "MassiveScenarioClassification": 75.4, + "MTOPDomainClassification (en)": 92.42, + "MTOPIntentClassification (en)": 62.44, + "MassiveIntentClassification (en)": 67.05, + "MassiveScenarioClassification (en)": 75.4, "ToxicConversationsClassification": 66.6, "TweetSentimentExtractionClassification": 56.02 } @@ -11045,8 +11670,8 @@ "STS14": 74.64, "STS15": 84.85, "STS16": 81.57, - "STS17": 85.8, - "STS22": 66.17, + "STS17 (en-en)": 85.8, + "STS22 (en)": 66.17, "STSBenchmark": 79.58 } ] @@ -11074,16 +11699,134 @@ "accuracy": [ { "Model": "gtr-t5-large", - "AmazonCounterfactualClassification": 45.87, + "AmazonCounterfactualClassification (de)": 59.38, + "AmazonCounterfactualClassification (en)": 70.03, + "AmazonCounterfactualClassification (en-ext)": 69.86, + "AmazonCounterfactualClassification (ja)": 45.87, "AmazonPolarityClassification": 73.92, - "AmazonReviewsClassification": 21.83, + "AmazonReviewsClassification (de)": 33.06, + "AmazonReviewsClassification (en)": 37.21, + "AmazonReviewsClassification (es)": 34.0, + "AmazonReviewsClassification (fr)": 33.48, + "AmazonReviewsClassification (ja)": 21.78, + "AmazonReviewsClassification (zh)": 21.83, "Banking77Classification": 81.21, "EmotionClassification": 46.33, "ImdbClassification": 70.86, - "MTOPDomainClassification": 16.36, - "MTOPIntentClassification": 5.38, - "MassiveIntentClassification": 4.64, - "MassiveScenarioClassification": 8.16, + "MTOPDomainClassification (de)": 81.91, + "MTOPDomainClassification (en)": 94.01, + "MTOPDomainClassification (es)": 84.7, + "MTOPDomainClassification (fr)": 82.48, + "MTOPDomainClassification (hi)": 22.11, + "MTOPDomainClassification (th)": 16.36, + "MTOPIntentClassification (de)": 52.13, + "MTOPIntentClassification (en)": 63.86, + "MTOPIntentClassification (es)": 52.62, + "MTOPIntentClassification (fr)": 46.39, + "MTOPIntentClassification (hi)": 3.9, + "MTOPIntentClassification (th)": 5.38, + "MassiveIntentClassification (af)": 41.02, + "MassiveIntentClassification (am)": 2.34, + "MassiveIntentClassification (ar)": 4.87, + "MassiveIntentClassification (az)": 34.92, + "MassiveIntentClassification (bn)": 2.52, + "MassiveIntentClassification (cy)": 35.87, + "MassiveIntentClassification (da)": 45.3, + "MassiveIntentClassification (de)": 51.48, + "MassiveIntentClassification (el)": 10.0, + "MassiveIntentClassification (en)": 70.06, + "MassiveIntentClassification (es)": 53.3, + "MassiveIntentClassification (fa)": 3.59, + "MassiveIntentClassification (fi)": 37.35, + "MassiveIntentClassification (fr)": 54.83, + "MassiveIntentClassification (he)": 2.52, + "MassiveIntentClassification (hi)": 2.88, + "MassiveIntentClassification (hu)": 33.52, + "MassiveIntentClassification (hy)": 3.13, + "MassiveIntentClassification (id)": 40.11, + "MassiveIntentClassification (is)": 34.77, + "MassiveIntentClassification (it)": 51.21, + "MassiveIntentClassification (ja)": 4.75, + "MassiveIntentClassification (jv)": 35.6, + "MassiveIntentClassification (ka)": 2.71, + "MassiveIntentClassification (km)": 5.48, + "MassiveIntentClassification (kn)": 2.44, + "MassiveIntentClassification (ko)": 2.59, + "MassiveIntentClassification (lv)": 38.15, + "MassiveIntentClassification (ml)": 2.67, + "MassiveIntentClassification (mn)": 18.47, + "MassiveIntentClassification (ms)": 35.58, + "MassiveIntentClassification (my)": 4.35, + "MassiveIntentClassification (nb)": 43.78, + "MassiveIntentClassification (nl)": 45.96, + "MassiveIntentClassification (pl)": 39.08, + "MassiveIntentClassification (pt)": 52.27, + "MassiveIntentClassification (ro)": 46.39, + "MassiveIntentClassification (ru)": 16.82, + "MassiveIntentClassification (sl)": 37.3, + "MassiveIntentClassification (sq)": 41.73, + "MassiveIntentClassification (sv)": 43.51, + "MassiveIntentClassification (sw)": 35.97, + "MassiveIntentClassification (ta)": 1.52, + "MassiveIntentClassification (te)": 2.57, + "MassiveIntentClassification (th)": 3.94, + "MassiveIntentClassification (tl)": 41.03, + "MassiveIntentClassification (tr)": 33.75, + "MassiveIntentClassification (ur)": 2.57, + "MassiveIntentClassification (vi)": 25.23, + "MassiveIntentClassification (zh-CN)": 2.41, + "MassiveIntentClassification (zh-TW)": 4.64, + "MassiveScenarioClassification (af)": 51.48, + "MassiveScenarioClassification (am)": 7.74, + "MassiveScenarioClassification (ar)": 12.03, + "MassiveScenarioClassification (az)": 41.77, + "MassiveScenarioClassification (bn)": 8.07, + "MassiveScenarioClassification (cy)": 43.67, + "MassiveScenarioClassification (da)": 54.88, + "MassiveScenarioClassification (de)": 63.63, + "MassiveScenarioClassification (el)": 16.83, + "MassiveScenarioClassification (en)": 75.49, + "MassiveScenarioClassification (es)": 61.48, + "MassiveScenarioClassification (fa)": 6.48, + "MassiveScenarioClassification (fi)": 43.54, + "MassiveScenarioClassification (fr)": 64.06, + "MassiveScenarioClassification (he)": 8.03, + "MassiveScenarioClassification (hi)": 7.5, + "MassiveScenarioClassification (hu)": 42.59, + "MassiveScenarioClassification (hy)": 9.22, + "MassiveScenarioClassification (id)": 48.67, + "MassiveScenarioClassification (is)": 43.87, + "MassiveScenarioClassification (it)": 59.83, + "MassiveScenarioClassification (ja)": 5.62, + "MassiveScenarioClassification (jv)": 42.18, + "MassiveScenarioClassification (ka)": 7.52, + "MassiveScenarioClassification (km)": 9.55, + "MassiveScenarioClassification (kn)": 8.34, + "MassiveScenarioClassification (ko)": 6.11, + "MassiveScenarioClassification (lv)": 43.35, + "MassiveScenarioClassification (ml)": 7.28, + "MassiveScenarioClassification (mn)": 23.94, + "MassiveScenarioClassification (ms)": 45.18, + "MassiveScenarioClassification (my)": 9.33, + "MassiveScenarioClassification (nb)": 52.71, + "MassiveScenarioClassification (nl)": 57.02, + "MassiveScenarioClassification (pl)": 46.79, + "MassiveScenarioClassification (pt)": 59.45, + "MassiveScenarioClassification (ro)": 56.8, + "MassiveScenarioClassification (ru)": 25.85, + "MassiveScenarioClassification (sl)": 42.51, + "MassiveScenarioClassification (sq)": 50.41, + "MassiveScenarioClassification (sv)": 54.16, + "MassiveScenarioClassification (sw)": 43.02, + "MassiveScenarioClassification (ta)": 7.21, + "MassiveScenarioClassification (te)": 6.9, + "MassiveScenarioClassification (th)": 8.7, + "MassiveScenarioClassification (tl)": 51.76, + "MassiveScenarioClassification (tr)": 42.54, + "MassiveScenarioClassification (ur)": 9.32, + "MassiveScenarioClassification (vi)": 31.51, + "MassiveScenarioClassification (zh-CN)": 3.84, + "MassiveScenarioClassification (zh-TW)": 8.16, "ToxicConversationsClassification": 68.65, "TweetSentimentExtractionClassification": 54.09 } @@ -11167,8 +11910,35 @@ "STS14": 77.16, "STS15": 86.31, "STS16": 81.85, - "STS17": 47.48, - "STS22": 29.42, + "STS17 (ar-ar)": 10.19, + "STS17 (en-ar)": -5.77, + "STS17 (en-de)": 67.43, + "STS17 (en-en)": 83.93, + "STS17 (en-tr)": 8.75, + "STS17 (es-en)": 54.96, + "STS17 (es-es)": 82.74, + "STS17 (fr-en)": 60.5, + "STS17 (it-en)": 46.26, + "STS17 (ko-ko)": 8.96, + "STS17 (nl-en)": 47.48, + "STS22 (ar)": 34.97, + "STS22 (de)": 51.7, + "STS22 (de-en)": 48.76, + "STS22 (de-fr)": 57.5, + "STS22 (de-pl)": 32.76, + "STS22 (en)": 64.3, + "STS22 (es)": 57.49, + "STS22 (es-en)": 67.76, + "STS22 (es-it)": 57.18, + "STS22 (fr)": 78.7, + "STS22 (fr-pl)": 61.98, + "STS22 (it)": 67.67, + "STS22 (pl)": 30.68, + "STS22 (pl-en)": 54.17, + "STS22 (ru)": 15.36, + "STS22 (tr)": 58.12, + "STS22 (zh)": 27.32, + "STS22 (zh-en)": 29.42, "STSBenchmark": 77.6 } ] @@ -11193,8 +11963,122 @@ "f1": [ { "Model": "gtr-t5-xl", - "BUCC": 1.49, - "Tatoeba": 29.74 + "BUCC (de-en)": 90.99, + "BUCC (fr-en)": 88.55, + "BUCC (ru-en)": 2.07, + "BUCC (zh-en)": 1.49, + "Tatoeba (afr-eng)": 33.47, + "Tatoeba (amh-eng)": 0.01, + "Tatoeba (ang-eng)": 30.74, + "Tatoeba (ara-eng)": 0.47, + "Tatoeba (arq-eng)": 0.34, + "Tatoeba (arz-eng)": 0.14, + "Tatoeba (ast-eng)": 51.74, + "Tatoeba (awa-eng)": 0.49, + "Tatoeba (aze-eng)": 7.43, + "Tatoeba (bel-eng)": 3.45, + "Tatoeba (ben-eng)": 0.06, + "Tatoeba (ber-eng)": 5.79, + "Tatoeba (bos-eng)": 17.43, + "Tatoeba (bre-eng)": 5.69, + "Tatoeba (bul-eng)": 7.55, + "Tatoeba (cat-eng)": 48.06, + "Tatoeba (cbk-eng)": 54.56, + "Tatoeba (ceb-eng)": 8.72, + "Tatoeba (ces-eng)": 8.76, + "Tatoeba (cha-eng)": 27.56, + "Tatoeba (cmn-eng)": 2.26, + "Tatoeba (cor-eng)": 3.69, + "Tatoeba (csb-eng)": 13.18, + "Tatoeba (cym-eng)": 6.97, + "Tatoeba (dan-eng)": 47.36, + "Tatoeba (deu-eng)": 91.54, + "Tatoeba (dsb-eng)": 13.2, + "Tatoeba (dtp-eng)": 4.54, + "Tatoeba (ell-eng)": 0.55, + "Tatoeba (epo-eng)": 27.86, + "Tatoeba (est-eng)": 5.13, + "Tatoeba (eus-eng)": 10.23, + "Tatoeba (fao-eng)": 21.44, + "Tatoeba (fin-eng)": 6.62, + "Tatoeba (fra-eng)": 79.66, + "Tatoeba (fry-eng)": 32.92, + "Tatoeba (gla-eng)": 2.87, + "Tatoeba (gle-eng)": 3.26, + "Tatoeba (glg-eng)": 63.81, + "Tatoeba (gsw-eng)": 29.71, + "Tatoeba (heb-eng)": 0.33, + "Tatoeba (hin-eng)": 0.25, + "Tatoeba (hrv-eng)": 17.16, + "Tatoeba (hsb-eng)": 12.02, + "Tatoeba (hun-eng)": 7.21, + "Tatoeba (hye-eng)": 0.78, + "Tatoeba (ido-eng)": 40.83, + "Tatoeba (ile-eng)": 54.95, + "Tatoeba (ina-eng)": 72.28, + "Tatoeba (ind-eng)": 30.95, + "Tatoeba (isl-eng)": 11.29, + "Tatoeba (ita-eng)": 73.83, + "Tatoeba (jav-eng)": 8.66, + "Tatoeba (jpn-eng)": 0.61, + "Tatoeba (kab-eng)": 1.78, + "Tatoeba (kat-eng)": 0.79, + "Tatoeba (kaz-eng)": 0.95, + "Tatoeba (khm-eng)": 0.49, + "Tatoeba (kor-eng)": 1.87, + "Tatoeba (kur-eng)": 10.91, + "Tatoeba (kzj-eng)": 5.72, + "Tatoeba (lat-eng)": 18.24, + "Tatoeba (lfn-eng)": 43.49, + "Tatoeba (lit-eng)": 7.13, + "Tatoeba (lvs-eng)": 7.04, + "Tatoeba (mal-eng)": 0.44, + "Tatoeba (mar-eng)": 0.03, + "Tatoeba (max-eng)": 18.99, + "Tatoeba (mhr-eng)": 1.11, + "Tatoeba (mkd-eng)": 2.49, + "Tatoeba (mon-eng)": 2.01, + "Tatoeba (nds-eng)": 39.96, + "Tatoeba (nld-eng)": 58.86, + "Tatoeba (nno-eng)": 29.07, + "Tatoeba (nob-eng)": 40.25, + "Tatoeba (nov-eng)": 50.19, + "Tatoeba (oci-eng)": 30.72, + "Tatoeba (orv-eng)": 0.85, + "Tatoeba (pam-eng)": 7.21, + "Tatoeba (pes-eng)": 0.53, + "Tatoeba (pms-eng)": 31.07, + "Tatoeba (pol-eng)": 18.06, + "Tatoeba (por-eng)": 81.92, + "Tatoeba (ron-eng)": 62.6, + "Tatoeba (rus-eng)": 22.24, + "Tatoeba (slk-eng)": 10.59, + "Tatoeba (slv-eng)": 11.4, + "Tatoeba (spa-eng)": 85.78, + "Tatoeba (sqi-eng)": 14.92, + "Tatoeba (srp-eng)": 9.87, + "Tatoeba (swe-eng)": 55.08, + "Tatoeba (swg-eng)": 32.66, + "Tatoeba (swh-eng)": 7.64, + "Tatoeba (tam-eng)": 0.49, + "Tatoeba (tat-eng)": 1.28, + "Tatoeba (tel-eng)": 0.45, + "Tatoeba (tgl-eng)": 23.63, + "Tatoeba (tha-eng)": 0.61, + "Tatoeba (tuk-eng)": 5.71, + "Tatoeba (tur-eng)": 8.25, + "Tatoeba (tzl-eng)": 28.4, + "Tatoeba (uig-eng)": 0.57, + "Tatoeba (ukr-eng)": 5.69, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (uzb-eng)": 4.19, + "Tatoeba (vie-eng)": 9.07, + "Tatoeba (war-eng)": 12.31, + "Tatoeba (wuu-eng)": 1.38, + "Tatoeba (xho-eng)": 7.6, + "Tatoeba (yid-eng)": 0.41, + "Tatoeba (yue-eng)": 1.31, + "Tatoeba (zsm-eng)": 29.74 } ] }, @@ -11202,16 +12086,134 @@ "accuracy": [ { "Model": "gtr-t5-xl", - "AmazonCounterfactualClassification": 50.59, + "AmazonCounterfactualClassification (de)": 59.79, + "AmazonCounterfactualClassification (en)": 68.6, + "AmazonCounterfactualClassification (en-ext)": 69.03, + "AmazonCounterfactualClassification (ja)": 50.59, "AmazonPolarityClassification": 74.58, - "AmazonReviewsClassification": 21.89, + "AmazonReviewsClassification (de)": 35.06, + "AmazonReviewsClassification (en)": 38.2, + "AmazonReviewsClassification (es)": 37.18, + "AmazonReviewsClassification (fr)": 35.48, + "AmazonReviewsClassification (ja)": 22.24, + "AmazonReviewsClassification (zh)": 21.89, "Banking77Classification": 82.22, "EmotionClassification": 45.54, "ImdbClassification": 68.15, - "MTOPDomainClassification": 15.87, - "MTOPIntentClassification": 5.55, - "MassiveIntentClassification": 3.35, - "MassiveScenarioClassification": 8.77, + "MTOPDomainClassification (de)": 85.42, + "MTOPDomainClassification (en)": 93.6, + "MTOPDomainClassification (es)": 88.2, + "MTOPDomainClassification (fr)": 85.05, + "MTOPDomainClassification (hi)": 21.74, + "MTOPDomainClassification (th)": 15.87, + "MTOPIntentClassification (de)": 55.75, + "MTOPIntentClassification (en)": 65.93, + "MTOPIntentClassification (es)": 57.73, + "MTOPIntentClassification (fr)": 51.07, + "MTOPIntentClassification (hi)": 3.19, + "MTOPIntentClassification (th)": 5.55, + "MassiveIntentClassification (af)": 42.6, + "MassiveIntentClassification (am)": 2.12, + "MassiveIntentClassification (ar)": 4.64, + "MassiveIntentClassification (az)": 35.05, + "MassiveIntentClassification (bn)": 2.84, + "MassiveIntentClassification (cy)": 36.19, + "MassiveIntentClassification (da)": 48.42, + "MassiveIntentClassification (de)": 55.49, + "MassiveIntentClassification (el)": 10.14, + "MassiveIntentClassification (en)": 70.23, + "MassiveIntentClassification (es)": 56.72, + "MassiveIntentClassification (fa)": 3.54, + "MassiveIntentClassification (fi)": 37.13, + "MassiveIntentClassification (fr)": 57.67, + "MassiveIntentClassification (he)": 2.56, + "MassiveIntentClassification (hi)": 3.24, + "MassiveIntentClassification (hu)": 34.22, + "MassiveIntentClassification (hy)": 3.01, + "MassiveIntentClassification (id)": 46.54, + "MassiveIntentClassification (is)": 34.77, + "MassiveIntentClassification (it)": 54.13, + "MassiveIntentClassification (ja)": 4.27, + "MassiveIntentClassification (jv)": 36.97, + "MassiveIntentClassification (ka)": 2.72, + "MassiveIntentClassification (km)": 5.35, + "MassiveIntentClassification (kn)": 3.17, + "MassiveIntentClassification (ko)": 2.64, + "MassiveIntentClassification (lv)": 36.32, + "MassiveIntentClassification (ml)": 3.18, + "MassiveIntentClassification (mn)": 22.85, + "MassiveIntentClassification (ms)": 42.87, + "MassiveIntentClassification (my)": 4.04, + "MassiveIntentClassification (nb)": 45.87, + "MassiveIntentClassification (nl)": 49.53, + "MassiveIntentClassification (pl)": 42.64, + "MassiveIntentClassification (pt)": 57.03, + "MassiveIntentClassification (ro)": 49.95, + "MassiveIntentClassification (ru)": 36.58, + "MassiveIntentClassification (sl)": 39.44, + "MassiveIntentClassification (sq)": 41.78, + "MassiveIntentClassification (sv)": 47.95, + "MassiveIntentClassification (sw)": 35.85, + "MassiveIntentClassification (ta)": 2.32, + "MassiveIntentClassification (te)": 2.2, + "MassiveIntentClassification (th)": 3.74, + "MassiveIntentClassification (tl)": 43.12, + "MassiveIntentClassification (tr)": 35.24, + "MassiveIntentClassification (ur)": 3.0, + "MassiveIntentClassification (vi)": 30.01, + "MassiveIntentClassification (zh-CN)": 1.72, + "MassiveIntentClassification (zh-TW)": 3.35, + "MassiveScenarioClassification (af)": 52.54, + "MassiveScenarioClassification (am)": 6.3, + "MassiveScenarioClassification (ar)": 11.96, + "MassiveScenarioClassification (az)": 40.17, + "MassiveScenarioClassification (bn)": 8.29, + "MassiveScenarioClassification (cy)": 42.24, + "MassiveScenarioClassification (da)": 57.28, + "MassiveScenarioClassification (de)": 68.09, + "MassiveScenarioClassification (el)": 16.66, + "MassiveScenarioClassification (en)": 75.94, + "MassiveScenarioClassification (es)": 64.32, + "MassiveScenarioClassification (fa)": 6.9, + "MassiveScenarioClassification (fi)": 43.96, + "MassiveScenarioClassification (fr)": 66.72, + "MassiveScenarioClassification (he)": 7.51, + "MassiveScenarioClassification (hi)": 7.82, + "MassiveScenarioClassification (hu)": 42.16, + "MassiveScenarioClassification (hy)": 9.33, + "MassiveScenarioClassification (id)": 53.54, + "MassiveScenarioClassification (is)": 42.84, + "MassiveScenarioClassification (it)": 62.44, + "MassiveScenarioClassification (ja)": 7.29, + "MassiveScenarioClassification (jv)": 43.13, + "MassiveScenarioClassification (ka)": 7.63, + "MassiveScenarioClassification (km)": 9.08, + "MassiveScenarioClassification (kn)": 8.1, + "MassiveScenarioClassification (ko)": 6.35, + "MassiveScenarioClassification (lv)": 40.24, + "MassiveScenarioClassification (ml)": 7.65, + "MassiveScenarioClassification (mn)": 27.98, + "MassiveScenarioClassification (ms)": 52.41, + "MassiveScenarioClassification (my)": 9.21, + "MassiveScenarioClassification (nb)": 54.44, + "MassiveScenarioClassification (nl)": 60.35, + "MassiveScenarioClassification (pl)": 49.97, + "MassiveScenarioClassification (pt)": 62.78, + "MassiveScenarioClassification (ro)": 59.62, + "MassiveScenarioClassification (ru)": 43.44, + "MassiveScenarioClassification (sl)": 44.79, + "MassiveScenarioClassification (sq)": 50.84, + "MassiveScenarioClassification (sv)": 58.21, + "MassiveScenarioClassification (sw)": 44.63, + "MassiveScenarioClassification (ta)": 7.95, + "MassiveScenarioClassification (te)": 7.5, + "MassiveScenarioClassification (th)": 8.79, + "MassiveScenarioClassification (tl)": 53.54, + "MassiveScenarioClassification (tr)": 42.47, + "MassiveScenarioClassification (ur)": 9.58, + "MassiveScenarioClassification (vi)": 34.68, + "MassiveScenarioClassification (zh-CN)": 5.21, + "MassiveScenarioClassification (zh-TW)": 8.77, "ToxicConversationsClassification": 67.56, "TweetSentimentExtractionClassification": 54.77 } @@ -11295,8 +12297,35 @@ "STS14": 77.07, "STS15": 86.01, "STS16": 82.23, - "STS17": 56.91, - "STS22": 28.85, + "STS17 (ar-ar)": 9.06, + "STS17 (en-ar)": -3.22, + "STS17 (en-de)": 70.38, + "STS17 (en-en)": 84.9, + "STS17 (en-tr)": 17.17, + "STS17 (es-en)": 60.24, + "STS17 (es-es)": 81.93, + "STS17 (fr-en)": 62.17, + "STS17 (it-en)": 59.11, + "STS17 (ko-ko)": 8.9, + "STS17 (nl-en)": 56.91, + "STS22 (ar)": 37.66, + "STS22 (de)": 50.58, + "STS22 (de-en)": 53.63, + "STS22 (de-fr)": 55.72, + "STS22 (de-pl)": 27.99, + "STS22 (en)": 66.61, + "STS22 (es)": 59.14, + "STS22 (es-en)": 69.99, + "STS22 (es-it)": 60.94, + "STS22 (fr)": 79.43, + "STS22 (fr-pl)": 61.98, + "STS22 (it)": 67.14, + "STS22 (pl)": 33.74, + "STS22 (pl-en)": 60.18, + "STS22 (ru)": 32.69, + "STS22 (tr)": 55.79, + "STS22 (zh)": 31.16, + "STS22 (zh-en)": 28.85, "STSBenchmark": 77.65 } ] @@ -11324,16 +12353,16 @@ "accuracy": [ { "Model": "gtr-t5-xxl", - "AmazonCounterfactualClassification": 67.3, + "AmazonCounterfactualClassification (en)": 67.3, "AmazonPolarityClassification": 75.05, - "AmazonReviewsClassification": 37.3, + "AmazonReviewsClassification (en)": 37.3, "Banking77Classification": 82.32, "EmotionClassification": 43.19, "ImdbClassification": 70.8, - "MTOPDomainClassification": 93.84, - "MTOPIntentClassification": 67.71, - "MassiveIntentClassification": 70.61, - "MassiveScenarioClassification": 77.77, + "MTOPDomainClassification (en)": 93.84, + "MTOPIntentClassification (en)": 67.71, + "MassiveIntentClassification (en)": 70.61, + "MassiveScenarioClassification (en)": 77.77, "ToxicConversationsClassification": 68.48, "TweetSentimentExtractionClassification": 54.54 } @@ -11417,8 +12446,8 @@ "STS14": 78.24, "STS15": 86.26, "STS16": 81.61, - "STS17": 85.18, - "STS22": 65.76, + "STS17 (en-en)": 85.18, + "STS22 (en)": 65.76, "STSBenchmark": 77.73 } ] @@ -11448,8 +12477,8 @@ "Model": "herbert-base-retrieval-v2", "AllegroReviews": 34.11, "CBD": 68.35, - "MassiveIntentClassification": 65.53, - "MassiveScenarioClassification": 68.51, + "MassiveIntentClassification (pl)": 65.53, + "MassiveScenarioClassification (pl)": 68.51, "PAC": 68.4, "PolEmo2.0-IN": 64.18, "PolEmo2.0-OUT": 45.73 @@ -11509,7 +12538,7 @@ "Model": "herbert-base-retrieval-v2", "CDSC-R": 86.18, "SICK-R-PL": 64.67, - "STS22": 39.73 + "STS22 (pl)": 39.73 } ] }, @@ -11736,8 +12765,122 @@ "f1": [ { "Model": "komninos", - "BUCC": 0.05, - "Tatoeba": 7.27 + "BUCC (de-en)": 0.18, + "BUCC (fr-en)": 0.08, + "BUCC (ru-en)": 0.15, + "BUCC (zh-en)": 0.05, + "Tatoeba (afr-eng)": 4.82, + "Tatoeba (amh-eng)": 1.18, + "Tatoeba (ang-eng)": 8.54, + "Tatoeba (ara-eng)": 0.63, + "Tatoeba (arq-eng)": 0.4, + "Tatoeba (arz-eng)": 0.63, + "Tatoeba (ast-eng)": 11.69, + "Tatoeba (awa-eng)": 0.0, + "Tatoeba (aze-eng)": 3.22, + "Tatoeba (bel-eng)": 1.75, + "Tatoeba (ben-eng)": 0.2, + "Tatoeba (ber-eng)": 7.0, + "Tatoeba (bos-eng)": 9.31, + "Tatoeba (bre-eng)": 4.17, + "Tatoeba (bul-eng)": 1.29, + "Tatoeba (cat-eng)": 7.73, + "Tatoeba (cbk-eng)": 5.61, + "Tatoeba (ceb-eng)": 4.88, + "Tatoeba (ces-eng)": 3.55, + "Tatoeba (cha-eng)": 19.29, + "Tatoeba (cmn-eng)": 0.5, + "Tatoeba (cor-eng)": 4.15, + "Tatoeba (csb-eng)": 5.69, + "Tatoeba (cym-eng)": 8.4, + "Tatoeba (dan-eng)": 6.99, + "Tatoeba (deu-eng)": 3.67, + "Tatoeba (dsb-eng)": 5.33, + "Tatoeba (dtp-eng)": 4.25, + "Tatoeba (ell-eng)": 0.63, + "Tatoeba (epo-eng)": 2.45, + "Tatoeba (est-eng)": 2.69, + "Tatoeba (eus-eng)": 4.69, + "Tatoeba (fao-eng)": 7.61, + "Tatoeba (fin-eng)": 3.36, + "Tatoeba (fra-eng)": 7.0, + "Tatoeba (fry-eng)": 12.36, + "Tatoeba (gla-eng)": 3.07, + "Tatoeba (gle-eng)": 4.81, + "Tatoeba (glg-eng)": 8.12, + "Tatoeba (gsw-eng)": 18.87, + "Tatoeba (heb-eng)": 0.68, + "Tatoeba (hin-eng)": 0.1, + "Tatoeba (hrv-eng)": 5.41, + "Tatoeba (hsb-eng)": 6.32, + "Tatoeba (hun-eng)": 3.42, + "Tatoeba (hye-eng)": 0.97, + "Tatoeba (ido-eng)": 7.1, + "Tatoeba (ile-eng)": 13.61, + "Tatoeba (ina-eng)": 8.57, + "Tatoeba (ind-eng)": 7.26, + "Tatoeba (isl-eng)": 4.09, + "Tatoeba (ita-eng)": 5.54, + "Tatoeba (jav-eng)": 11.43, + "Tatoeba (jpn-eng)": 0.2, + "Tatoeba (kab-eng)": 2.71, + "Tatoeba (kat-eng)": 1.11, + "Tatoeba (kaz-eng)": 1.17, + "Tatoeba (khm-eng)": 0.55, + "Tatoeba (kor-eng)": 0.5, + "Tatoeba (kur-eng)": 8.55, + "Tatoeba (kzj-eng)": 4.61, + "Tatoeba (lat-eng)": 4.07, + "Tatoeba (lfn-eng)": 2.83, + "Tatoeba (lit-eng)": 0.95, + "Tatoeba (lvs-eng)": 3.25, + "Tatoeba (mal-eng)": 0.29, + "Tatoeba (mar-eng)": 0.2, + "Tatoeba (max-eng)": 14.53, + "Tatoeba (mhr-eng)": 0.2, + "Tatoeba (mkd-eng)": 0.2, + "Tatoeba (mon-eng)": 1.1, + "Tatoeba (nds-eng)": 10.37, + "Tatoeba (nld-eng)": 9.5, + "Tatoeba (nno-eng)": 4.49, + "Tatoeba (nob-eng)": 4.95, + "Tatoeba (nov-eng)": 14.53, + "Tatoeba (oci-eng)": 5.8, + "Tatoeba (orv-eng)": 0.24, + "Tatoeba (pam-eng)": 6.65, + "Tatoeba (pes-eng)": 0.5, + "Tatoeba (pms-eng)": 8.05, + "Tatoeba (pol-eng)": 5.13, + "Tatoeba (por-eng)": 5.87, + "Tatoeba (ron-eng)": 6.76, + "Tatoeba (rus-eng)": 0.2, + "Tatoeba (slk-eng)": 4.23, + "Tatoeba (slv-eng)": 6.05, + "Tatoeba (spa-eng)": 5.03, + "Tatoeba (sqi-eng)": 4.36, + "Tatoeba (srp-eng)": 1.77, + "Tatoeba (swe-eng)": 6.72, + "Tatoeba (swg-eng)": 8.54, + "Tatoeba (swh-eng)": 11.49, + "Tatoeba (tam-eng)": 1.3, + "Tatoeba (tat-eng)": 0.77, + "Tatoeba (tel-eng)": 0.85, + "Tatoeba (tgl-eng)": 2.61, + "Tatoeba (tha-eng)": 0.69, + "Tatoeba (tuk-eng)": 5.76, + "Tatoeba (tur-eng)": 5.24, + "Tatoeba (tzl-eng)": 15.51, + "Tatoeba (uig-eng)": 0.6, + "Tatoeba (ukr-eng)": 1.23, + "Tatoeba (urd-eng)": 0.4, + "Tatoeba (uzb-eng)": 4.73, + "Tatoeba (vie-eng)": 6.55, + "Tatoeba (war-eng)": 4.12, + "Tatoeba (wuu-eng)": 0.2, + "Tatoeba (xho-eng)": 4.33, + "Tatoeba (yid-eng)": 0.59, + "Tatoeba (yue-eng)": 0.5, + "Tatoeba (zsm-eng)": 7.27 } ] }, @@ -11745,16 +12888,16 @@ "accuracy": [ { "Model": "komninos", - "AmazonCounterfactualClassification": 60.54, + "AmazonCounterfactualClassification (en)": 60.54, "AmazonPolarityClassification": 59.59, - "AmazonReviewsClassification": 31.01, + "AmazonReviewsClassification (en)": 31.01, "Banking77Classification": 67.05, "EmotionClassification": 33.18, "ImdbClassification": 63.98, - "MTOPDomainClassification": 78.57, - "MTOPIntentClassification": 57.07, - "MassiveIntentClassification": 57.21, - "MassiveScenarioClassification": 66.11, + "MTOPDomainClassification (en)": 78.57, + "MTOPIntentClassification (en)": 57.07, + "MassiveIntentClassification (en)": 57.21, + "MassiveScenarioClassification (en)": 66.11, "ToxicConversationsClassification": 67.76, "TweetSentimentExtractionClassification": 49.68 } @@ -11842,8 +12985,35 @@ "STS14": 63.56, "STS15": 74.08, "STS16": 64.6, - "STS17": 0.44, - "STS22": 14.05, + "STS17 (ar-ar)": 13.78, + "STS17 (en-ar)": 9.08, + "STS17 (en-de)": -3.11, + "STS17 (en-en)": 76.91, + "STS17 (en-tr)": -0.45, + "STS17 (es-en)": -8.18, + "STS17 (es-es)": 48.23, + "STS17 (fr-en)": 5.81, + "STS17 (it-en)": 3.64, + "STS17 (ko-ko)": 2.54, + "STS17 (nl-en)": 0.44, + "STS22 (ar)": 32.42, + "STS22 (de)": 33.04, + "STS22 (de-en)": 28.65, + "STS22 (de-fr)": 14.77, + "STS22 (de-pl)": 11.21, + "STS22 (en)": 53.89, + "STS22 (es)": 48.53, + "STS22 (es-en)": 26.97, + "STS22 (es-it)": 41.1, + "STS22 (fr)": 49.43, + "STS22 (fr-pl)": 39.44, + "STS22 (it)": 57.77, + "STS22 (pl)": 12.47, + "STS22 (pl-en)": 45.55, + "STS22 (ru)": 19.44, + "STS22 (tr)": 47.38, + "STS22 (zh)": 4.78, + "STS22 (zh-en)": 14.05, "STSBenchmark": 61.55 } ] @@ -11910,11 +13080,11 @@ "accuracy": [ { "Model": "luotuo-bert-medium", - "AmazonReviewsClassification": 34.46, + "AmazonReviewsClassification (zh)": 34.46, "IFlyTek": 41.75, "JDReview": 79.68, - "MassiveIntentClassification": 57.47, - "MassiveScenarioClassification": 65.32, + "MassiveIntentClassification (zh-CN)": 57.47, + "MassiveScenarioClassification (zh-CN)": 65.32, "MultilingualSentiment": 61.21, "OnlineShopping": 84.3, "TNews": 45.22, @@ -11983,7 +13153,7 @@ "LCQMC": 66.74, "PAWSX": 12.31, "QBQTC": 27.2, - "STS22": 66.4, + "STS22 (zh)": 66.4, "STSB": 73.22 } ] @@ -12006,11 +13176,11 @@ "accuracy": [ { "Model": "m3e-base", - "AmazonReviewsClassification": 43.02, + "AmazonReviewsClassification (zh)": 43.02, "IFlyTek": 44.42, "JDReview": 85.33, - "MassiveIntentClassification": 68.4, - "MassiveScenarioClassification": 74.6, + "MassiveIntentClassification (zh-CN)": 68.4, + "MassiveScenarioClassification (zh-CN)": 74.6, "MultilingualSentiment": 71.9, "OnlineShopping": 87.77, "TNews": 48.28, @@ -12079,7 +13249,7 @@ "LCQMC": 74.88, "PAWSX": 12.19, "QBQTC": 32.07, - "STS22": 66.73, + "STS22 (zh)": 66.73, "STSB": 76.97 } ] @@ -12102,11 +13272,11 @@ "accuracy": [ { "Model": "m3e-large", - "AmazonReviewsClassification": 44.44, + "AmazonReviewsClassification (zh)": 44.44, "IFlyTek": 43.96, "JDReview": 86.92, - "MassiveIntentClassification": 67.23, - "MassiveScenarioClassification": 74.88, + "MassiveIntentClassification (zh-CN)": 67.23, + "MassiveScenarioClassification (zh-CN)": 74.88, "MultilingualSentiment": 72.47, "OnlineShopping": 89.59, "TNews": 48.26, @@ -12175,7 +13345,7 @@ "LCQMC": 74.2, "PAWSX": 15.95, "QBQTC": 32.65, - "STS22": 62.91, + "STS22 (zh)": 62.91, "STSB": 74.16 } ] @@ -12237,12 +13407,12 @@ "accuracy": [ { "Model": "mistral-embed", - "AmazonReviewsClassification": 41.59, - "MTOPDomainClassification": 90.05, - "MTOPIntentClassification": 66.09, - "MasakhaNEWSClassification": 81.4, - "MassiveIntentClassification": 62.83, - "MassiveScenarioClassification": 69.71 + "AmazonReviewsClassification (fr)": 41.59, + "MTOPDomainClassification (fr)": 90.05, + "MTOPIntentClassification (fr)": 66.09, + "MasakhaNEWSClassification (fra)": 81.4, + "MassiveIntentClassification (fr)": 62.83, + "MassiveScenarioClassification (fr)": 69.71 } ] }, @@ -12255,8 +13425,8 @@ "HALClusteringS2S": 26.17, "MLSUMClusteringP2P": 45.28, "MLSUMClusteringS2S": 42.74, - "MasakhaNEWSClusteringP2P": 48.13, - "MasakhaNEWSClusteringS2S": 39.62 + "MasakhaNEWSClusteringP2P (fra)": 48.13, + "MasakhaNEWSClusteringS2S (fra)": 39.62 } ] }, @@ -12264,13 +13434,13 @@ "max_ap": [ { "Model": "mistral-embed", - "OpusparcusPC": 92.61, - "PawsXPairClassification": 62.02 + "OpusparcusPC (fr)": 92.61, + "PawsXPairClassification (fr)": 62.02 }, { "Model": "mistral-embed", - "OpusparcusPC": 92.64, - "PawsXPairClassification": 62.05 + "OpusparcusPC (fr)": 92.64, + "PawsXPairClassification (fr)": 62.05 } ] }, @@ -12297,9 +13467,9 @@ "LegalBenchCorporateLobbying": 94.11, "LegalQuAD": 47.17, "LegalSummarization": 67.39, - "MintakaRetrieval": 21.73, + "MintakaRetrieval (fr)": 21.73, "SyntecRetrieval": 78.77, - "XPQARetrieval": 74.24 + "XPQARetrieval (fr)": 74.24 } ] }, @@ -12308,8 +13478,8 @@ { "Model": "mistral-embed", "SICKFr": 76.21, - "STS22": 82.74, - "STSBenchmarkMultilingualSTS": 79.72 + "STS22 (fr)": 82.74, + "STSBenchmarkMultilingualSTS (fr)": 79.72 } ] }, @@ -12453,16 +13623,16 @@ "accuracy": [ { "Model": "msmarco-bert-co-condensor", - "AmazonCounterfactualClassification": 64.06, + "AmazonCounterfactualClassification (en)": 64.06, "AmazonPolarityClassification": 66.88, - "AmazonReviewsClassification": 34.85, + "AmazonReviewsClassification (en)": 34.85, "Banking77Classification": 82.35, "EmotionClassification": 41.91, "ImdbClassification": 60.17, - "MTOPDomainClassification": 91.34, - "MTOPIntentClassification": 71.07, - "MassiveIntentClassification": 70.4, - "MassiveScenarioClassification": 73.73, + "MTOPDomainClassification (en)": 91.34, + "MTOPIntentClassification (en)": 71.07, + "MassiveIntentClassification (en)": 70.4, + "MassiveScenarioClassification (en)": 73.73, "ToxicConversationsClassification": 64.01, "TweetSentimentExtractionClassification": 55.74 } @@ -12546,8 +13716,8 @@ "STS14": 74.02, "STS15": 82.57, "STS16": 79.78, - "STS17": 85.94, - "STS22": 67.54, + "STS17 (en-en)": 85.94, + "STS22 (en)": 67.54, "STSBenchmark": 76.97 } ] @@ -12575,12 +13745,12 @@ "accuracy": [ { "Model": "multi-qa-MiniLM-L6-cos-v1", - "AmazonReviewsClassification": 27.05, - "MTOPDomainClassification": 72.97, - "MTOPIntentClassification": 37.18, - "MasakhaNEWSClassification": 75.62, - "MassiveIntentClassification": 42.64, - "MassiveScenarioClassification": 49.92 + "AmazonReviewsClassification (fr)": 27.05, + "MTOPDomainClassification (fr)": 72.97, + "MTOPIntentClassification (fr)": 37.18, + "MasakhaNEWSClassification (fra)": 75.62, + "MassiveIntentClassification (fr)": 42.64, + "MassiveScenarioClassification (fr)": 49.92 } ] }, @@ -12593,8 +13763,8 @@ "HALClusteringS2S": 12.49, "MLSUMClusteringP2P": 35.15, "MLSUMClusteringS2S": 25.95, - "MasakhaNEWSClusteringP2P": 53.73, - "MasakhaNEWSClusteringS2S": 27.27 + "MasakhaNEWSClusteringP2P (fra)": 53.73, + "MasakhaNEWSClusteringS2S (fra)": 27.27 } ] }, @@ -12602,13 +13772,13 @@ "max_ap": [ { "Model": "multi-qa-MiniLM-L6-cos-v1", - "OpusparcusPC": 88.07, - "PawsXPairClassification": 57.36 + "OpusparcusPC (fr)": 88.07, + "PawsXPairClassification (fr)": 57.36 }, { "Model": "multi-qa-MiniLM-L6-cos-v1", - "OpusparcusPC": 88.07, - "PawsXPairClassification": 57.48 + "OpusparcusPC (fr)": 88.07, + "PawsXPairClassification (fr)": 57.48 } ] }, @@ -12627,9 +13797,9 @@ "Model": "multi-qa-MiniLM-L6-cos-v1", "AlloprofRetrieval": 30.23, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 16.31, + "MintakaRetrieval (fr)": 16.31, "SyntecRetrieval": 58.07, - "XPQARetrieval": 48.83 + "XPQARetrieval (fr)": 48.83 } ] }, @@ -12638,8 +13808,8 @@ { "Model": "multi-qa-MiniLM-L6-cos-v1", "SICKFr": 62.11, - "STS22": 74.62, - "STSBenchmarkMultilingualSTS": 63.85 + "STS22 (fr)": 74.62, + "STSBenchmarkMultilingualSTS (fr)": 63.85 } ] }, @@ -12789,11 +13959,10 @@ "AmazonCounterfactualClassification (de)": 70.81, "AmazonCounterfactualClassification (ja)": 72.02, "AmazonPolarityClassification": 91.76, - "AmazonReviewsClassification": 40.94, + "AmazonReviewsClassification (fr)": 41.55, "AmazonReviewsClassification (en)": 47.54, "AmazonReviewsClassification (de)": 44.37, "AmazonReviewsClassification (es)": 43.38, - "AmazonReviewsClassification (fr)": 41.55, "AmazonReviewsClassification (ja)": 39.57, "AmazonReviewsClassification (zh)": 38.34, "AngryTweetsClassification": 56.28, @@ -12810,24 +13979,21 @@ "JDReview": 75.72, "KinopoiskClassification": 50.89, "LccSentimentClassification": 60.13, - "MTOPDomainClassification": 84.79, + "MTOPDomainClassification (fr)": 82.88, "MTOPDomainClassification (en)": 90.9, "MTOPDomainClassification (de)": 87.94, "MTOPDomainClassification (es)": 85.96, - "MTOPDomainClassification (fr)": 82.88, "MTOPDomainClassification (hi)": 83.92, "MTOPDomainClassification (th)": 83.94, - "MTOPIntentClassification": 55.51, + "MTOPIntentClassification (fr)": 52.23, "MTOPIntentClassification (en)": 61.6, "MTOPIntentClassification (de)": 61.05, "MTOPIntentClassification (es)": 55.36, - "MTOPIntentClassification (fr)": 52.23, "MTOPIntentClassification (hi)": 53.93, "MTOPIntentClassification (th)": 58.69, - "MasakhaNEWSClassification": 79.69, + "MasakhaNEWSClassification (fra)": 76.35, "MasakhaNEWSClassification (amh)": 83.8, "MasakhaNEWSClassification (eng)": 76.49, - "MasakhaNEWSClassification (fra)": 76.35, "MasakhaNEWSClassification (hau)": 74.63, "MasakhaNEWSClassification (ibo)": 64.59, "MasakhaNEWSClassification (lin)": 70.57, @@ -12841,23 +14007,22 @@ "MasakhaNEWSClassification (tir)": 67.1, "MasakhaNEWSClassification (xho)": 76.03, "MasakhaNEWSClassification (yor)": 72.75, - "MassiveIntentClassification": 61.04, + "MassiveIntentClassification (da)": 60.69, + "MassiveIntentClassification (nb)": 60.06, + "MassiveIntentClassification (sv)": 62.43, + "MassiveIntentClassification (pl)": 60.98, "MassiveIntentClassification (es)": 61.13, "MassiveIntentClassification (de)": 59.82, "MassiveIntentClassification (mn)": 46.8, "MassiveIntentClassification (sq)": 51.07, - "MassiveIntentClassification (sv)": 62.43, "MassiveIntentClassification (tr)": 60.69, "MassiveIntentClassification (hy)": 48.77, "MassiveIntentClassification (az)": 51.36, - "MassiveIntentClassification (da)": 60.69, "MassiveIntentClassification (my)": 46.67, "MassiveIntentClassification (th)": 59.63, "MassiveIntentClassification (sl)": 53.84, "MassiveIntentClassification (sw)": 45.24, - "MassiveIntentClassification (pl)": 60.98, "MassiveIntentClassification (bn)": 51.69, - "MassiveIntentClassification (nb)": 60.06, "MassiveIntentClassification (fi)": 58.91, "MassiveIntentClassification (jv)": 43.23, "MassiveIntentClassification (id)": 58.7, @@ -12893,11 +14058,13 @@ "MassiveIntentClassification (ml)": 53.75, "MassiveIntentClassification (zh-TW)": 56.4, "MassiveIntentClassification (ru)": 62.78, - "MassiveScenarioClassification": 66.11, + "MassiveScenarioClassification (da)": 67.97, + "MassiveScenarioClassification (nb)": 66.57, + "MassiveScenarioClassification (sv)": 69.35, + "MassiveScenarioClassification (pl)": 66.12, "MassiveScenarioClassification (az)": 55.15, "MassiveScenarioClassification (fr)": 67.37, "MassiveScenarioClassification (ko)": 67.9, - "MassiveScenarioClassification (da)": 67.97, "MassiveScenarioClassification (fi)": 64.94, "MassiveScenarioClassification (ro)": 63.5, "MassiveScenarioClassification (zh-TW)": 63.73, @@ -12926,8 +14093,6 @@ "MassiveScenarioClassification (hy)": 53.63, "MassiveScenarioClassification (tr)": 65.18, "MassiveScenarioClassification (ja)": 69.89, - "MassiveScenarioClassification (nb)": 66.57, - "MassiveScenarioClassification (sv)": 69.35, "MassiveScenarioClassification (id)": 63.6, "MassiveScenarioClassification (th)": 67.37, "MassiveScenarioClassification (de)": 68.4, @@ -12941,7 +14106,6 @@ "MassiveScenarioClassification (af)": 58.95, "MassiveScenarioClassification (el)": 65.38, "MassiveScenarioClassification (km)": 38.45, - "MassiveScenarioClassification (pl)": 66.12, "MassiveScenarioClassification (ur)": 56.74, "MassiveScenarioClassification (hi)": 62.91, "MassiveScenarioClassification (zh-CN)": 70.24, @@ -12984,10 +14148,9 @@ "MLSUMClusteringP2P (ru)": 43.47, "MLSUMClusteringS2S": 38.53, "MLSUMClusteringS2S (ru)": 40.87, - "MasakhaNEWSClusteringP2P": 47.91, + "MasakhaNEWSClusteringP2P (fra)": 58.28, "MasakhaNEWSClusteringP2P (amh)": 58.05, "MasakhaNEWSClusteringP2P (eng)": 43.8, - "MasakhaNEWSClusteringP2P (fra)": 58.28, "MasakhaNEWSClusteringP2P (hau)": 44.78, "MasakhaNEWSClusteringP2P (ibo)": 44.97, "MasakhaNEWSClusteringP2P (lin)": 48.08, @@ -13001,10 +14164,9 @@ "MasakhaNEWSClusteringP2P (tir)": 53.44, "MasakhaNEWSClusteringP2P (xho)": 40.32, "MasakhaNEWSClusteringP2P (yor)": 37.97, - "MasakhaNEWSClusteringS2S": 51.16, + "MasakhaNEWSClusteringS2S (fra)": 55.43, "MasakhaNEWSClusteringS2S (amh)": 49.38, "MasakhaNEWSClusteringS2S (eng)": 45.76, - "MasakhaNEWSClusteringS2S (fra)": 55.43, "MasakhaNEWSClusteringS2S (hau)": 16.11, "MasakhaNEWSClusteringS2S (ibo)": 24.38, "MasakhaNEWSClusteringS2S (lin)": 44.8, @@ -13039,10 +14201,10 @@ "CDSC-E": 72.67, "Cmnli": 74.51, "Ocnli": 59.63, - "OpusparcusPC": 92.72, + "OpusparcusPC (fr)": 92.72, "PPC": 88.01, "PSC": 99.14, - "PawsXPairClassification": 56.93, + "PawsXPairClassification (fr)": 56.93, "SICK-E-PL": 68.77 }, { @@ -13050,20 +14212,18 @@ "CDSC-E": 72.7, "Cmnli": 74.98, "Ocnli": 60.47, - "OpusparcusPC": 92.73, + "OpusparcusPC (fr)": 92.12, "OpusparcusPC (de)": 95.83, "OpusparcusPC (en)": 98.71, "OpusparcusPC (fi)": 90.3, - "OpusparcusPC (fr)": 92.12, "OpusparcusPC (ru)": 86.82, "OpusparcusPC (sv)": 93.05, "PPC": 88.01, "PSC": 99.14, - "PawsXPairClassification": 56.93, + "PawsXPairClassification (fr)": 56.07, "PawsXPairClassification (de)": 54.26, "PawsXPairClassification (en)": 55.79, "PawsXPairClassification (es)": 54.13, - "PawsXPairClassification (fr)": 56.07, "PawsXPairClassification (ja)": 49.15, "PawsXPairClassification (ko)": 51.01, "PawsXPairClassification (zh)": 55.13, @@ -13173,11 +14333,10 @@ "MMarcoRetrieval": 76.01, "MSMARCO-PL": 29.52, "MedicalRetrieval": 48.33, - "MintakaRetrieval": 23.46, + "MintakaRetrieval (fr)": 30.96, "MintakaRetrieval (ar)": 23.06, "MintakaRetrieval (de)": 29.8, "MintakaRetrieval (es)": 29.88, - "MintakaRetrieval (fr)": 30.96, "MintakaRetrieval (hi)": 22.68, "MintakaRetrieval (it)": 29.77, "MintakaRetrieval (ja)": 22.98, @@ -13212,7 +14371,7 @@ "Touche2020": 21.5, "VideoRetrieval": 61.26, "WinoGrande": 56.18, - "XPQARetrieval": 65.81, + "XPQARetrieval (fr)": 65.81, "XPQARetrieval (ara-ara)": 39.97, "XPQARetrieval (eng-ara)": 17.23, "XPQARetrieval (ara-eng)": 34.35, @@ -13265,9 +14424,10 @@ "QBQTC": 28.81, "SICK-R-PL": 71.23, "SICKFr": 76.23, - "STS22": 34.07, + "STS22 (zh)": 65.64, + "STS22 (pl)": 34.07, "STSB": 79.05, - "STSBenchmarkMultilingualSTS": 80.62 + "STSBenchmarkMultilingualSTS (fr)": 80.62 }, { "Model": "multilingual-e5-base", @@ -13562,11 +14722,10 @@ "AmazonCounterfactualClassification (de)": 68.66, "AmazonCounterfactualClassification (ja)": 78.8, "AmazonPolarityClassification": 93.26, - "AmazonReviewsClassification": 41.91, + "AmazonReviewsClassification (fr)": 42.55, "AmazonReviewsClassification (en)": 49.2, "AmazonReviewsClassification (de)": 46.5, "AmazonReviewsClassification (es)": 44.35, - "AmazonReviewsClassification (fr)": 42.55, "AmazonReviewsClassification (ja)": 41.71, "AmazonReviewsClassification (zh)": 38.87, "AngryTweetsClassification": 57.69, @@ -13583,24 +14742,21 @@ "JDReview": 80.54, "KinopoiskClassification": 56.59, "LccSentimentClassification": 61.53, - "MTOPDomainClassification": 86.41, + "MTOPDomainClassification (fr)": 86.23, "MTOPDomainClassification (en)": 91.81, "MTOPDomainClassification (de)": 90.44, "MTOPDomainClassification (es)": 88.34, - "MTOPDomainClassification (fr)": 86.23, "MTOPDomainClassification (hi)": 86.84, "MTOPDomainClassification (th)": 86.88, - "MTOPIntentClassification": 59.43, + "MTOPIntentClassification (fr)": 56.25, "MTOPIntentClassification (en)": 64.29, "MTOPIntentClassification (de)": 65.97, "MTOPIntentClassification (es)": 61.9, - "MTOPIntentClassification (fr)": 56.25, "MTOPIntentClassification (hi)": 59.17, "MTOPIntentClassification (th)": 62.59, - "MasakhaNEWSClassification": 79.38, + "MasakhaNEWSClassification (fra)": 76.11, "MasakhaNEWSClassification (amh)": 83.7, "MasakhaNEWSClassification (eng)": 78.26, - "MasakhaNEWSClassification (fra)": 76.11, "MasakhaNEWSClassification (hau)": 76.17, "MasakhaNEWSClassification (ibo)": 70.05, "MasakhaNEWSClassification (lin)": 75.89, @@ -13614,17 +14770,18 @@ "MasakhaNEWSClassification (tir)": 72.06, "MasakhaNEWSClassification (xho)": 82.56, "MasakhaNEWSClassification (yor)": 81.09, - "MassiveIntentClassification": 65.07, + "MassiveIntentClassification (da)": 63.7, + "MassiveIntentClassification (nb)": 64.54, + "MassiveIntentClassification (sv)": 66.52, + "MassiveIntentClassification (pl)": 65.09, "MassiveIntentClassification (he)": 62.44, "MassiveIntentClassification (id)": 63.51, "MassiveIntentClassification (fi)": 64.28, "MassiveIntentClassification (hu)": 64.0, - "MassiveIntentClassification (nb)": 64.54, "MassiveIntentClassification (vi)": 63.39, "MassiveIntentClassification (ko)": 63.92, "MassiveIntentClassification (ta)": 53.41, "MassiveIntentClassification (te)": 53.96, - "MassiveIntentClassification (da)": 63.7, "MassiveIntentClassification (ar)": 54.1, "MassiveIntentClassification (en)": 68.51, "MassiveIntentClassification (hi)": 60.93, @@ -13643,12 +14800,10 @@ "MassiveIntentClassification (nl)": 65.0, "MassiveIntentClassification (az)": 54.68, "MassiveIntentClassification (ru)": 65.76, - "MassiveIntentClassification (sv)": 66.52, "MassiveIntentClassification (ro)": 59.76, "MassiveIntentClassification (zh-TW)": 58.78, "MassiveIntentClassification (jv)": 48.96, "MassiveIntentClassification (fa)": 63.74, - "MassiveIntentClassification (pl)": 65.09, "MassiveIntentClassification (km)": 34.88, "MassiveIntentClassification (ja)": 67.11, "MassiveIntentClassification (kn)": 53.45, @@ -13666,14 +14821,16 @@ "MassiveIntentClassification (mn)": 49.6, "MassiveIntentClassification (hy)": 50.89, "MassiveIntentClassification (pt)": 65.6, - "MassiveScenarioClassification": 69.82, + "MassiveScenarioClassification (da)": 71.18, + "MassiveScenarioClassification (nb)": 70.44, + "MassiveScenarioClassification (sv)": 72.77, + "MassiveScenarioClassification (pl)": 69.83, "MassiveScenarioClassification (en)": 73.04, "MassiveScenarioClassification (ta)": 58.76, "MassiveScenarioClassification (ml)": 63.17, "MassiveScenarioClassification (pt)": 68.33, "MassiveScenarioClassification (he)": 67.72, "MassiveScenarioClassification (ar)": 61.0, - "MassiveScenarioClassification (pl)": 69.83, "MassiveScenarioClassification (vi)": 68.91, "MassiveScenarioClassification (ms)": 63.55, "MassiveScenarioClassification (sl)": 65.33, @@ -13691,11 +14848,9 @@ "MassiveScenarioClassification (ro)": 66.06, "MassiveScenarioClassification (zh-CN)": 72.25, "MassiveScenarioClassification (kn)": 59.36, - "MassiveScenarioClassification (nb)": 70.44, "MassiveScenarioClassification (de)": 71.25, "MassiveScenarioClassification (el)": 69.74, "MassiveScenarioClassification (es)": 69.07, - "MassiveScenarioClassification (da)": 71.18, "MassiveScenarioClassification (ur)": 60.89, "MassiveScenarioClassification (af)": 62.35, "MassiveScenarioClassification (ko)": 70.54, @@ -13703,7 +14858,6 @@ "MassiveScenarioClassification (ja)": 73.16, "MassiveScenarioClassification (az)": 58.49, "MassiveScenarioClassification (fa)": 67.55, - "MassiveScenarioClassification (sv)": 72.77, "MassiveScenarioClassification (zh-TW)": 64.35, "MassiveScenarioClassification (ka)": 47.82, "MassiveScenarioClassification (nl)": 71.11, @@ -13755,10 +14909,9 @@ "MLSUMClusteringP2P (ru)": 42.79, "MLSUMClusteringS2S": 37.65, "MLSUMClusteringS2S (ru)": 44.32, - "MasakhaNEWSClusteringP2P": 40.94, + "MasakhaNEWSClusteringP2P (fra)": 41.66, "MasakhaNEWSClusteringP2P (amh)": 67.16, "MasakhaNEWSClusteringP2P (eng)": 61.1, - "MasakhaNEWSClusteringP2P (fra)": 41.66, "MasakhaNEWSClusteringP2P (hau)": 60.7, "MasakhaNEWSClusteringP2P (ibo)": 48.41, "MasakhaNEWSClusteringP2P (lin)": 57.69, @@ -13772,10 +14925,9 @@ "MasakhaNEWSClusteringP2P (tir)": 54.21, "MasakhaNEWSClusteringP2P (xho)": 41.12, "MasakhaNEWSClusteringP2P (yor)": 36.22, - "MasakhaNEWSClusteringS2S": 30.56, + "MasakhaNEWSClusteringS2S (fra)": 39.84, "MasakhaNEWSClusteringS2S (amh)": 47.24, "MasakhaNEWSClusteringS2S (eng)": 53.93, - "MasakhaNEWSClusteringS2S (fra)": 39.84, "MasakhaNEWSClusteringS2S (hau)": 19.24, "MasakhaNEWSClusteringS2S (ibo)": 28.88, "MasakhaNEWSClusteringS2S (lin)": 42.22, @@ -13810,10 +14962,10 @@ "CDSC-E": 74.47, "Cmnli": 78.18, "Ocnli": 61.6, - "OpusparcusPC": 93.89, + "OpusparcusPC (fr)": 93.89, "PPC": 92.18, "PSC": 99.39, - "PawsXPairClassification": 58.5, + "PawsXPairClassification (fr)": 58.5, "SICK-E-PL": 75.96 }, { @@ -13821,20 +14973,18 @@ "CDSC-E": 74.47, "Cmnli": 78.18, "Ocnli": 61.6, - "OpusparcusPC": 93.89, + "OpusparcusPC (fr)": 93.68, "OpusparcusPC (de)": 97.27, "OpusparcusPC (en)": 98.74, "OpusparcusPC (fi)": 94.26, - "OpusparcusPC (fr)": 93.68, "OpusparcusPC (ru)": 89.64, "OpusparcusPC (sv)": 94.98, "PPC": 92.18, "PSC": 99.4, - "PawsXPairClassification": 58.61, + "PawsXPairClassification (fr)": 58.69, "PawsXPairClassification (de)": 57.14, "PawsXPairClassification (en)": 62.97, "PawsXPairClassification (es)": 56.87, - "PawsXPairClassification (fr)": 58.69, "PawsXPairClassification (ja)": 50.84, "PawsXPairClassification (ko)": 52.22, "PawsXPairClassification (zh)": 56.95, @@ -13944,11 +15094,10 @@ "MMarcoRetrieval": 79.2, "MSMARCO-PL": 33.38, "MedicalRetrieval": 51.44, - "MintakaRetrieval": 25.2, + "MintakaRetrieval (fr)": 34.24, "MintakaRetrieval (ar)": 26.5, "MintakaRetrieval (de)": 32.77, "MintakaRetrieval (es)": 34.23, - "MintakaRetrieval (fr)": 34.24, "MintakaRetrieval (hi)": 27.45, "MintakaRetrieval (it)": 33.84, "MintakaRetrieval (ja)": 26.45, @@ -13983,7 +15132,7 @@ "Touche2020": 23.13, "VideoRetrieval": 58.28, "WinoGrande": 54.99, - "XPQARetrieval": 66.15, + "XPQARetrieval (fr)": 66.15, "XPQARetrieval (ara-ara)": 43.69, "XPQARetrieval (eng-ara)": 30.86, "XPQARetrieval (ara-eng)": 39.11, @@ -14036,9 +15185,10 @@ "QBQTC": 29.77, "SICK-R-PL": 75.08, "SICKFr": 78.78, - "STS22": 34.66, + "STS22 (zh)": 65.64, + "STS22 (pl)": 34.66, "STSB": 81.08, - "STSBenchmarkMultilingualSTS": 82.53 + "STSBenchmarkMultilingualSTS (fr)": 82.53 }, { "Model": "multilingual-e5-large", @@ -15033,10 +16183,9 @@ "AmazonReviewsClassification (en)": 45.75, "AmazonReviewsClassification (de)": 41.07, "AmazonReviewsClassification (es)": 41.37, - "AmazonReviewsClassification (fr)": 39.47, + "AmazonReviewsClassification (fr)": 39.68, "AmazonReviewsClassification (ja)": 38.55, "AmazonReviewsClassification (zh)": 38.31, - "AmazonReviewsClassification": 39.68, "AngryTweetsClassification": 53.57, "Banking77Classification": 70.44, "CBD": 63.25, @@ -15054,20 +16203,18 @@ "MTOPDomainClassification (en)": 88.99, "MTOPDomainClassification (de)": 86.15, "MTOPDomainClassification (es)": 85.53, - "MTOPDomainClassification (fr)": 81.5, + "MTOPDomainClassification (fr)": 81.2, "MTOPDomainClassification (hi)": 84.07, "MTOPDomainClassification (th)": 83.16, - "MTOPDomainClassification": 81.2, "MTOPIntentClassification (en)": 56.69, "MTOPIntentClassification (de)": 55.88, "MTOPIntentClassification (es)": 53.15, - "MTOPIntentClassification (fr)": 44.35, + "MTOPIntentClassification (fr)": 46.01, "MTOPIntentClassification (hi)": 52.26, "MTOPIntentClassification (th)": 54.61, - "MTOPIntentClassification": 46.01, "MasakhaNEWSClassification (amh)": 84.28, "MasakhaNEWSClassification (eng)": 75.61, - "MasakhaNEWSClassification (fra)": 74.67, + "MasakhaNEWSClassification (fra)": 77.65, "MasakhaNEWSClassification (hau)": 73.08, "MasakhaNEWSClassification (ibo)": 63.9, "MasakhaNEWSClassification (lin)": 73.37, @@ -15081,7 +16228,6 @@ "MasakhaNEWSClassification (tir)": 68.01, "MasakhaNEWSClassification (xho)": 72.22, "MasakhaNEWSClassification (yor)": 73.84, - "MasakhaNEWSClassification": 77.65, "MassiveIntentClassification (is)": 41.53, "MassiveIntentClassification (tl)": 48.7, "MassiveIntentClassification (he)": 51.11, @@ -15091,7 +16237,7 @@ "MassiveIntentClassification (sl)": 47.71, "MassiveIntentClassification (af)": 48.74, "MassiveIntentClassification (de)": 55.52, - "MassiveIntentClassification (pl)": 57.33, + "MassiveIntentClassification (pl)": 57.4, "MassiveIntentClassification (en)": 63.87, "MassiveIntentClassification (fi)": 55.14, "MassiveIntentClassification (lv)": 44.93, @@ -15100,10 +16246,10 @@ "MassiveIntentClassification (mn)": 47.38, "MassiveIntentClassification (it)": 58.8, "MassiveIntentClassification (ko)": 57.12, - "MassiveIntentClassification (nb)": 55.36, + "MassiveIntentClassification (nb)": 53.96, "MassiveIntentClassification (es)": 59.19, "MassiveIntentClassification (ja)": 61.58, - "MassiveIntentClassification (da)": 56.12, + "MassiveIntentClassification (da)": 54.63, "MassiveIntentClassification (zh-TW)": 53.75, "MassiveIntentClassification (id)": 56.2, "MassiveIntentClassification (ka)": 39.52, @@ -15130,16 +16276,15 @@ "MassiveIntentClassification (sq)": 48.68, "MassiveIntentClassification (vi)": 56.19, "MassiveIntentClassification (jv)": 42.96, - "MassiveIntentClassification (sv)": 58.2, + "MassiveIntentClassification (sv)": 56.6, "MassiveIntentClassification (hy)": 47.89, "MassiveIntentClassification (el)": 54.14, - "MassiveIntentClassification": 57.4, "MassiveScenarioClassification (de)": 65.88, - "MassiveScenarioClassification (nb)": 61.96, + "MassiveScenarioClassification (nb)": 59.9, "MassiveScenarioClassification (th)": 65.72, "MassiveScenarioClassification (ka)": 44.96, "MassiveScenarioClassification (jv)": 51.39, - "MassiveScenarioClassification (sv)": 67.33, + "MassiveScenarioClassification (sv)": 65.54, "MassiveScenarioClassification (fr)": 63.9, "MassiveScenarioClassification (tl)": 55.3, "MassiveScenarioClassification (hu)": 61.93, @@ -15152,7 +16297,7 @@ "MassiveScenarioClassification (km)": 39.01, "MassiveScenarioClassification (el)": 62.29, "MassiveScenarioClassification (bn)": 57.38, - "MassiveScenarioClassification (da)": 64.03, + "MassiveScenarioClassification (da)": 62.34, "MassiveScenarioClassification (ml)": 60.31, "MassiveScenarioClassification (ro)": 60.0, "MassiveScenarioClassification (ru)": 63.89, @@ -15184,8 +16329,7 @@ "MassiveScenarioClassification (vi)": 62.67, "MassiveScenarioClassification (he)": 59.22, "MassiveScenarioClassification (ar)": 54.56, - "MassiveScenarioClassification (pl)": 64.27, - "MassiveScenarioClassification": 64.25, + "MassiveScenarioClassification (pl)": 64.25, "MultilingualSentiment": 64.74, "NoRecClassification": 53.96, "NordicLangClassification": 75.15, @@ -15225,7 +16369,7 @@ "MLSUMClusteringS2S": 37.61, "MasakhaNEWSClusteringP2P (amh)": 66.2, "MasakhaNEWSClusteringP2P (eng)": 50.08, - "MasakhaNEWSClusteringP2P (fra)": 56.32, + "MasakhaNEWSClusteringP2P (fra)": 40.12, "MasakhaNEWSClusteringP2P (hau)": 53.63, "MasakhaNEWSClusteringP2P (ibo)": 49.19, "MasakhaNEWSClusteringP2P (lin)": 55.06, @@ -15239,10 +16383,9 @@ "MasakhaNEWSClusteringP2P (tir)": 57.68, "MasakhaNEWSClusteringP2P (xho)": 39.96, "MasakhaNEWSClusteringP2P (yor)": 26.56, - "MasakhaNEWSClusteringP2P": 40.12, "MasakhaNEWSClusteringS2S (amh)": 55.48, "MasakhaNEWSClusteringS2S (eng)": 37.79, - "MasakhaNEWSClusteringS2S (fra)": 35.8, + "MasakhaNEWSClusteringS2S (fra)": 39.22, "MasakhaNEWSClusteringS2S (hau)": 20.22, "MasakhaNEWSClusteringS2S (ibo)": 35.67, "MasakhaNEWSClusteringS2S (lin)": 41.12, @@ -15256,7 +16399,6 @@ "MasakhaNEWSClusteringS2S (tir)": 50.51, "MasakhaNEWSClusteringS2S (xho)": 37.26, "MasakhaNEWSClusteringS2S (yor)": 23.38, - "MasakhaNEWSClusteringS2S": 39.22, "MedrxivClusteringP2P": 30.72, "MedrxivClusteringS2S": 27.0, "RedditClustering": 40.12, @@ -15304,20 +16446,18 @@ "OpusparcusPC (de)": 94.9, "OpusparcusPC (en)": 98.42, "OpusparcusPC (fi)": 88.29, - "OpusparcusPC (fr)": 91.77, + "OpusparcusPC (fr)": 92.52, "OpusparcusPC (ru)": 84.79, "OpusparcusPC (sv)": 91.07, - "OpusparcusPC": 92.52, "PPC": 86.79, "PSC": 99.24, "PawsXPairClassification (de)": 52.19, "PawsXPairClassification (en)": 54.01, "PawsXPairClassification (es)": 51.46, - "PawsXPairClassification (fr)": 52.77, + "PawsXPairClassification (fr)": 55.72, "PawsXPairClassification (ja)": 48.41, "PawsXPairClassification (ko)": 49.98, "PawsXPairClassification (zh)": 54.03, - "PawsXPairClassification": 55.72, "SICK-E-PL": 66.34, "SprintDuplicateQuestions": 92.42, "TERRa": 55.14, @@ -15329,10 +16469,10 @@ "CDSC-E": 69.7, "Cmnli": 72.12, "Ocnli": 60.77, - "OpusparcusPC": 92.52, + "OpusparcusPC (fr)": 92.52, "PPC": 86.72, "PSC": 99.24, - "PawsXPairClassification": 55.68, + "PawsXPairClassification (fr)": 55.68, "SICK-E-PL": 66.34 } ] @@ -15415,12 +16555,11 @@ "MintakaRetrieval (ar)": 21.22, "MintakaRetrieval (de)": 25.6, "MintakaRetrieval (es)": 26.4, - "MintakaRetrieval (fr)": 25.0, + "MintakaRetrieval (fr)": 22.53, "MintakaRetrieval (hi)": 21.1, "MintakaRetrieval (it)": 26.25, "MintakaRetrieval (ja)": 20.69, "MintakaRetrieval (pt)": 24.44, - "MintakaRetrieval": 22.53, "NFCorpus": 31.0, "NFCorpus-PL": 26.48, "NQ-PL": 40.46, @@ -15487,7 +16626,7 @@ "XPQARetrieval (cmn-cmn)": 63.98, "XPQARetrieval (eng-cmn)": 16.52, "XPQARetrieval (cmn-eng)": 45.32, - "XPQARetrieval": 57.47 + "XPQARetrieval (fr)": 57.47 } ] }, @@ -15626,9 +16765,9 @@ "QBQTC": 30.25, "SICK-R-PL": 69.46, "SICKFr": 75.62, - "STS22": 35.8, + "STS22 (pl)": 35.8, "STSB": 77.73, - "STSBenchmarkMultilingualSTS": 79.32 + "STSBenchmarkMultilingualSTS (fr)": 79.32 } ] }, @@ -15687,8 +16826,12 @@ "DKHateClassification": 61.73, "DanishPoliticalCommentsClassification": 34.84, "LccSentimentClassification": 51.4, - "MassiveIntentClassification": 53.89, - "MassiveScenarioClassification": 55.37, + "MassiveIntentClassification (da)": 56.69, + "MassiveIntentClassification (nb)": 60.67, + "MassiveIntentClassification (sv)": 53.89, + "MassiveScenarioClassification (da)": 61.93, + "MassiveScenarioClassification (nb)": 67.31, + "MassiveScenarioClassification (sv)": 55.37, "NoRecClassification": 51.32, "NordicLangClassification": 84.69, "NorwegianParliament": 57.41, @@ -15739,8 +16882,12 @@ "DKHateClassification": 62.13, "DanishPoliticalCommentsClassification": 35.04, "LccSentimentClassification": 56.27, - "MassiveIntentClassification": 55.02, - "MassiveScenarioClassification": 57.12, + "MassiveIntentClassification (da)": 57.03, + "MassiveIntentClassification (nb)": 62.68, + "MassiveIntentClassification (sv)": 55.02, + "MassiveScenarioClassification (da)": 60.43, + "MassiveScenarioClassification (nb)": 67.44, + "MassiveScenarioClassification (sv)": 57.12, "NoRecClassification": 55.46, "NordicLangClassification": 85.27, "NorwegianParliament": 62.58, @@ -15822,16 +16969,16 @@ "accuracy": [ { "Model": "nomic-embed-text-v1.5-128", - "AmazonCounterfactualClassification": 69.78, + "AmazonCounterfactualClassification (en)": 69.78, "AmazonPolarityClassification": 88.74, - "AmazonReviewsClassification": 43.11, + "AmazonReviewsClassification (en)": 43.11, "Banking77Classification": 82.78, "EmotionClassification": 42.92, "ImdbClassification": 80.87, - "MTOPDomainClassification": 89.61, - "MTOPIntentClassification": 68.9, - "MassiveIntentClassification": 69.34, - "MassiveScenarioClassification": 74.21, + "MTOPDomainClassification (en)": 89.61, + "MTOPIntentClassification (en)": 68.9, + "MassiveIntentClassification (en)": 69.34, + "MassiveScenarioClassification (en)": 74.21, "ToxicConversationsClassification": 68.16, "TweetSentimentExtractionClassification": 57.99 } @@ -15915,8 +17062,8 @@ "STS14": 80.5, "STS15": 85.84, "STS16": 83.9, - "STS17": 86.27, - "STS22": 64.24, + "STS17 (en-en)": 86.27, + "STS22 (en)": 64.24, "STSBenchmark": 84.28 } ] @@ -15944,16 +17091,16 @@ "accuracy": [ { "Model": "nomic-embed-text-v1.5-256", - "AmazonCounterfactualClassification": 72.94, + "AmazonCounterfactualClassification (en)": 72.94, "AmazonPolarityClassification": 91.35, - "AmazonReviewsClassification": 45.73, + "AmazonReviewsClassification (en)": 45.73, "Banking77Classification": 83.69, "EmotionClassification": 45.88, "ImdbClassification": 83.99, - "MTOPDomainClassification": 91.68, - "MTOPIntentClassification": 72.47, - "MassiveIntentClassification": 71.76, - "MassiveScenarioClassification": 75.67, + "MTOPDomainClassification (en)": 91.68, + "MTOPIntentClassification (en)": 72.47, + "MassiveIntentClassification (en)": 71.76, + "MassiveScenarioClassification (en)": 75.67, "ToxicConversationsClassification": 70.87, "TweetSentimentExtractionClassification": 59.2 } @@ -16037,8 +17184,8 @@ "STS14": 81.25, "STS15": 86.51, "STS16": 84.24, - "STS17": 86.44, - "STS22": 65.14, + "STS17 (en-en)": 86.44, + "STS22 (en)": 65.14, "STSBenchmark": 84.8 } ] @@ -16066,16 +17213,16 @@ "accuracy": [ { "Model": "nomic-embed-text-v1.5-512", - "AmazonCounterfactualClassification": 74.27, + "AmazonCounterfactualClassification (en)": 74.27, "AmazonPolarityClassification": 91.89, - "AmazonReviewsClassification": 46.97, + "AmazonReviewsClassification (en)": 46.97, "Banking77Classification": 84.15, "EmotionClassification": 47.73, "ImdbClassification": 85.47, - "MTOPDomainClassification": 92.62, - "MTOPIntentClassification": 74.27, - "MassiveIntentClassification": 73.07, - "MassiveScenarioClassification": 76.82, + "MTOPDomainClassification (en)": 92.62, + "MTOPIntentClassification (en)": 74.27, + "MassiveIntentClassification (en)": 73.07, + "MassiveScenarioClassification (en)": 76.82, "ToxicConversationsClassification": 71.25, "TweetSentimentExtractionClassification": 60.4 } @@ -16159,8 +17306,8 @@ "STS14": 81.38, "STS15": 86.79, "STS16": 84.56, - "STS17": 87.25, - "STS22": 65.24, + "STS17 (en-en)": 87.25, + "STS22 (en)": 65.24, "STSBenchmark": 85.14 } ] @@ -16188,16 +17335,16 @@ "accuracy": [ { "Model": "nomic-embed-text-v1.5-64", - "AmazonCounterfactualClassification": 66.85, + "AmazonCounterfactualClassification (en)": 66.85, "AmazonPolarityClassification": 85.92, - "AmazonReviewsClassification": 41.02, + "AmazonReviewsClassification (en)": 41.02, "Banking77Classification": 80.63, "EmotionClassification": 40.55, "ImdbClassification": 76.6, - "MTOPDomainClassification": 86.31, - "MTOPIntentClassification": 62.77, - "MassiveIntentClassification": 64.95, - "MassiveScenarioClassification": 70.38, + "MTOPDomainClassification (en)": 86.31, + "MTOPIntentClassification (en)": 62.77, + "MassiveIntentClassification (en)": 64.95, + "MassiveScenarioClassification (en)": 70.38, "ToxicConversationsClassification": 66.53, "TweetSentimentExtractionClassification": 55.23 } @@ -16281,8 +17428,8 @@ "STS14": 79.37, "STS15": 84.69, "STS16": 83.36, - "STS17": 85.73, - "STS22": 63.83, + "STS17 (en-en)": 85.73, + "STS22 (en)": 63.83, "STSBenchmark": 83.46 } ] @@ -16319,8 +17466,12 @@ "DKHateClassification": 58.78, "DanishPoliticalCommentsClassification": 34.14, "LccSentimentClassification": 54.07, - "MassiveIntentClassification": 52.08, - "MassiveScenarioClassification": 53.53, + "MassiveIntentClassification (da)": 53.16, + "MassiveIntentClassification (nb)": 54.2, + "MassiveIntentClassification (sv)": 52.08, + "MassiveScenarioClassification (da)": 57.17, + "MassiveScenarioClassification (nb)": 60.69, + "MassiveScenarioClassification (sv)": 53.53, "NoRecClassification": 53.4, "NordicLangClassification": 82.67, "NorwegianParliament": 59.33, @@ -16371,8 +17522,12 @@ "DKHateClassification": 62.71, "DanishPoliticalCommentsClassification": 33.53, "LccSentimentClassification": 46.93, - "MassiveIntentClassification": 48.47, - "MassiveScenarioClassification": 50.6, + "MassiveIntentClassification (da)": 45.98, + "MassiveIntentClassification (nb)": 47.42, + "MassiveIntentClassification (sv)": 48.47, + "MassiveScenarioClassification (da)": 50.51, + "MassiveScenarioClassification (nb)": 54.25, + "MassiveScenarioClassification (sv)": 50.6, "NoRecClassification": 50.46, "NordicLangClassification": 84.25, "NorwegianParliament": 58.85, @@ -16411,121 +17566,123 @@ "f1": [ { "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "BUCC": 95.63, + "BUCC (de-en)": 97.11, + "BUCC (fr-en)": 94.99, + "BUCC (ru-en)": 95.06, + "BUCC (zh-en)": 95.63, "BornholmBitextMining": 19.67, - "Tatoeba": 95.31, - "Tatoeba (est-eng)": 97.33, - "Tatoeba (aze-eng)": 62.1, - "Tatoeba (oci-eng)": 38.57, - "Tatoeba (eus-eng)": 23.18, - "Tatoeba (sqi-eng)": 98.17, - "Tatoeba (yue-eng)": 71.45, + "Tatoeba (afr-eng)": 58.22, + "Tatoeba (amh-eng)": 36.21, + "Tatoeba (ang-eng)": 10.24, "Tatoeba (ara-eng)": 87.93, - "Tatoeba (wuu-eng)": 76.0, - "Tatoeba (lvs-eng)": 97.87, - "Tatoeba (ron-eng)": 95.3, - "Tatoeba (uzb-eng)": 17.14, - "Tatoeba (ell-eng)": 95.43, - "Tatoeba (lit-eng)": 93.16, - "Tatoeba (isl-eng)": 24.07, + "Tatoeba (arq-eng)": 18.6, + "Tatoeba (arz-eng)": 51.26, + "Tatoeba (ast-eng)": 62.17, "Tatoeba (awa-eng)": 33.43, - "Tatoeba (tuk-eng)": 15.16, - "Tatoeba (nld-eng)": 94.58, - "Tatoeba (ido-eng)": 40.25, - "Tatoeba (afr-eng)": 58.22, - "Tatoeba (cmn-eng)": 94.93, - "Tatoeba (max-eng)": 45.25, - "Tatoeba (tgl-eng)": 13.09, - "Tatoeba (ind-eng)": 92.74, - "Tatoeba (mkd-eng)": 91.0, + "Tatoeba (aze-eng)": 62.1, "Tatoeba (bel-eng)": 67.73, - "Tatoeba (hrv-eng)": 95.98, + "Tatoeba (ben-eng)": 36.48, + "Tatoeba (ber-eng)": 4.43, + "Tatoeba (bos-eng)": 93.27, "Tatoeba (bre-eng)": 5.56, "Tatoeba (bul-eng)": 92.65, - "Tatoeba (hun-eng)": 91.58, - "Tatoeba (srp-eng)": 92.24, - "Tatoeba (pol-eng)": 94.28, - "Tatoeba (mhr-eng)": 6.89, - "Tatoeba (fra-eng)": 91.72, - "Tatoeba (lfn-eng)": 47.02, - "Tatoeba (cha-eng)": 15.98, - "Tatoeba (hsb-eng)": 36.1, - "Tatoeba (rus-eng)": 91.87, - "Tatoeba (khm-eng)": 32.11, - "Tatoeba (arq-eng)": 18.6, - "Tatoeba (gla-eng)": 3.61, + "Tatoeba (cat-eng)": 94.42, + "Tatoeba (cbk-eng)": 55.37, + "Tatoeba (ceb-eng)": 8.05, "Tatoeba (ces-eng)": 95.12, - "Tatoeba (ita-eng)": 93.05, + "Tatoeba (cha-eng)": 15.98, + "Tatoeba (cmn-eng)": 94.93, + "Tatoeba (cor-eng)": 3.42, "Tatoeba (csb-eng)": 21.56, - "Tatoeba (kor-eng)": 92.52, - "Tatoeba (ile-eng)": 57.71, - "Tatoeba (xho-eng)": 4.52, - "Tatoeba (deu-eng)": 97.02, - "Tatoeba (heb-eng)": 86.88, - "Tatoeba (yid-eng)": 14.38, "Tatoeba (cym-eng)": 13.25, - "Tatoeba (jpn-eng)": 90.41, - "Tatoeba (tzl-eng)": 25.46, - "Tatoeba (ast-eng)": 62.17, - "Tatoeba (war-eng)": 7.25, - "Tatoeba (tha-eng)": 96.72, - "Tatoeba (fry-eng)": 31.13, + "Tatoeba (dan-eng)": 94.8, + "Tatoeba (deu-eng)": 97.02, + "Tatoeba (dsb-eng)": 33.43, "Tatoeba (dtp-eng)": 5.69, + "Tatoeba (ell-eng)": 95.43, + "Tatoeba (epo-eng)": 41.73, + "Tatoeba (est-eng)": 97.33, + "Tatoeba (eus-eng)": 23.18, + "Tatoeba (fao-eng)": 27.51, + "Tatoeba (fin-eng)": 93.1, + "Tatoeba (fra-eng)": 91.72, + "Tatoeba (fry-eng)": 31.13, + "Tatoeba (gla-eng)": 3.61, + "Tatoeba (gle-eng)": 11.62, + "Tatoeba (glg-eng)": 94.0, + "Tatoeba (gsw-eng)": 25.74, + "Tatoeba (heb-eng)": 86.88, + "Tatoeba (hin-eng)": 97.62, + "Tatoeba (hrv-eng)": 95.98, + "Tatoeba (hsb-eng)": 36.1, + "Tatoeba (hun-eng)": 91.58, "Tatoeba (hye-eng)": 93.28, + "Tatoeba (ido-eng)": 40.25, + "Tatoeba (ile-eng)": 57.71, + "Tatoeba (ina-eng)": 79.13, + "Tatoeba (ind-eng)": 92.74, + "Tatoeba (isl-eng)": 24.07, + "Tatoeba (ita-eng)": 93.05, + "Tatoeba (jav-eng)": 17.04, + "Tatoeba (jpn-eng)": 90.41, + "Tatoeba (kab-eng)": 1.16, + "Tatoeba (kat-eng)": 95.44, + "Tatoeba (kaz-eng)": 34.89, + "Tatoeba (khm-eng)": 32.11, + "Tatoeba (kor-eng)": 92.52, + "Tatoeba (kur-eng)": 46.94, + "Tatoeba (kzj-eng)": 6.24, + "Tatoeba (lat-eng)": 19.47, + "Tatoeba (lfn-eng)": 47.02, + "Tatoeba (lit-eng)": 93.16, + "Tatoeba (lvs-eng)": 97.87, + "Tatoeba (mal-eng)": 32.2, "Tatoeba (mar-eng)": 92.38, - "Tatoeba (cbk-eng)": 55.37, - "Tatoeba (uig-eng)": 24.39, - "Tatoeba (cor-eng)": 3.42, + "Tatoeba (max-eng)": 45.25, + "Tatoeba (mhr-eng)": 6.89, + "Tatoeba (mkd-eng)": 91.0, + "Tatoeba (mon-eng)": 95.04, + "Tatoeba (nds-eng)": 32.16, + "Tatoeba (nld-eng)": 94.58, + "Tatoeba (nno-eng)": 76.34, + "Tatoeba (nob-eng)": 97.73, "Tatoeba (nov-eng)": 47.99, + "Tatoeba (oci-eng)": 38.57, + "Tatoeba (orv-eng)": 15.1, + "Tatoeba (pam-eng)": 5.41, + "Tatoeba (pes-eng)": 92.59, + "Tatoeba (pms-eng)": 30.7, + "Tatoeba (pol-eng)": 94.28, + "Tatoeba (por-eng)": 92.13, + "Tatoeba (ron-eng)": 95.3, + "Tatoeba (rus-eng)": 91.87, + "Tatoeba (slk-eng)": 95.15, "Tatoeba (slv-eng)": 96.92, - "Tatoeba (kzj-eng)": 6.24, - "Tatoeba (dan-eng)": 94.8, - "Tatoeba (hin-eng)": 97.62, - "Tatoeba (ukr-eng)": 92.82, - "Tatoeba (jav-eng)": 17.04, + "Tatoeba (spa-eng)": 95.42, + "Tatoeba (sqi-eng)": 98.17, + "Tatoeba (srp-eng)": 92.24, "Tatoeba (swe-eng)": 94.42, "Tatoeba (swg-eng)": 26.31, - "Tatoeba (cat-eng)": 94.42, - "Tatoeba (ina-eng)": 79.13, - "Tatoeba (mal-eng)": 32.2, - "Tatoeba (gle-eng)": 11.62, - "Tatoeba (tel-eng)": 36.4, - "Tatoeba (zsm-eng)": 95.31, "Tatoeba (swh-eng)": 14.48, "Tatoeba (tam-eng)": 24.64, - "Tatoeba (epo-eng)": 41.73, - "Tatoeba (pms-eng)": 30.7, - "Tatoeba (mon-eng)": 95.04, - "Tatoeba (kat-eng)": 95.44, + "Tatoeba (tat-eng)": 10.25, + "Tatoeba (tel-eng)": 36.4, + "Tatoeba (tgl-eng)": 13.09, + "Tatoeba (tha-eng)": 96.72, + "Tatoeba (tuk-eng)": 15.16, + "Tatoeba (tur-eng)": 95.08, + "Tatoeba (tzl-eng)": 25.46, + "Tatoeba (uig-eng)": 24.39, + "Tatoeba (ukr-eng)": 92.82, "Tatoeba (urd-eng)": 94.57, + "Tatoeba (uzb-eng)": 17.14, "Tatoeba (vie-eng)": 95.12, - "Tatoeba (fin-eng)": 93.1, - "Tatoeba (tur-eng)": 95.08, - "Tatoeba (arz-eng)": 51.26, - "Tatoeba (fao-eng)": 27.51, - "Tatoeba (pes-eng)": 92.59, - "Tatoeba (tat-eng)": 10.25, - "Tatoeba (nds-eng)": 32.16, - "Tatoeba (nno-eng)": 76.34, - "Tatoeba (ber-eng)": 4.43, - "Tatoeba (bos-eng)": 93.27, - "Tatoeba (slk-eng)": 95.15, - "Tatoeba (spa-eng)": 95.42, - "Tatoeba (pam-eng)": 5.41, - "Tatoeba (ben-eng)": 36.48, - "Tatoeba (ang-eng)": 10.24, - "Tatoeba (kur-eng)": 46.94, - "Tatoeba (por-eng)": 92.13, - "Tatoeba (orv-eng)": 15.1, - "Tatoeba (dsb-eng)": 33.43, - "Tatoeba (amh-eng)": 36.21, - "Tatoeba (kab-eng)": 1.16, - "Tatoeba (kaz-eng)": 34.89, - "Tatoeba (nob-eng)": 97.73, - "Tatoeba (ceb-eng)": 8.05, - "Tatoeba (lat-eng)": 19.47, - "Tatoeba (glg-eng)": 94.0, - "Tatoeba (gsw-eng)": 25.74 + "Tatoeba (war-eng)": 7.25, + "Tatoeba (wuu-eng)": 76.0, + "Tatoeba (xho-eng)": 4.52, + "Tatoeba (yid-eng)": 14.38, + "Tatoeba (yue-eng)": 71.45, + "Tatoeba (zsm-eng)": 95.31 } ] }, @@ -16534,15 +17691,13 @@ { "Model": "paraphrase-multilingual-MiniLM-L12-v2", "AllegroReviews": 30.85, - "AmazonCounterfactualClassification": 63.45, - "AmazonCounterfactualClassification (en-ext)": 70.01, - "AmazonCounterfactualClassification (en)": 71.55, "AmazonCounterfactualClassification (de)": 68.36, + "AmazonCounterfactualClassification (en)": 71.55, + "AmazonCounterfactualClassification (en-ext)": 70.01, "AmazonCounterfactualClassification (ja)": 63.37, "AmazonPolarityClassification": 69.21, - "AmazonReviewsClassification": 35.26, - "AmazonReviewsClassification (en)": 35.12, "AmazonReviewsClassification (de)": 35.91, + "AmazonReviewsClassification (en)": 35.12, "AmazonReviewsClassification (es)": 37.49, "AmazonReviewsClassification (fr)": 35.29, "AmazonReviewsClassification (ja)": 33.21, @@ -16560,24 +17715,21 @@ "JDReview": 70.26, "KinopoiskClassification": 41.45, "LccSentimentClassification": 54.53, - "MTOPDomainClassification": 79.99, - "MTOPDomainClassification (en)": 87.03, "MTOPDomainClassification (de)": 79.21, + "MTOPDomainClassification (en)": 87.03, "MTOPDomainClassification (es)": 83.06, "MTOPDomainClassification (fr)": 78.64, "MTOPDomainClassification (hi)": 81.36, "MTOPDomainClassification (th)": 79.97, - "MTOPIntentClassification": 61.96, - "MTOPIntentClassification (en)": 65.5, "MTOPIntentClassification (de)": 54.21, + "MTOPIntentClassification (en)": 65.5, "MTOPIntentClassification (es)": 60.3, "MTOPIntentClassification (fr)": 54.01, "MTOPIntentClassification (hi)": 59.92, "MTOPIntentClassification (th)": 61.97, - "MasakhaNEWSClassification": 76.09, + "MasakhaNEWSClassification (fra)": 71.68, "MasakhaNEWSClassification (amh)": 64.28, "MasakhaNEWSClassification (eng)": 74.7, - "MasakhaNEWSClassification (fra)": 71.68, "MasakhaNEWSClassification (hau)": 47.96, "MasakhaNEWSClassification (ibo)": 42.46, "MasakhaNEWSClassification (lin)": 59.26, @@ -16591,7 +17743,8 @@ "MasakhaNEWSClassification (tir)": 27.94, "MasakhaNEWSClassification (xho)": 44.81, "MasakhaNEWSClassification (yor)": 52.92, - "MassiveIntentClassification": 57.52, + "MassiveIntentClassification (pl)": 59.48, + "MassiveIntentClassification (fr)": 60.24, "MassiveIntentClassification (de)": 50.71, "MassiveIntentClassification (he)": 52.55, "MassiveIntentClassification (th)": 58.92, @@ -16620,11 +17773,9 @@ "MassiveIntentClassification (mn)": 51.77, "MassiveIntentClassification (sl)": 57.35, "MassiveIntentClassification (tr)": 59.91, - "MassiveIntentClassification (fr)": 60.24, "MassiveIntentClassification (id)": 59.9, "MassiveIntentClassification (my)": 52.03, "MassiveIntentClassification (zh-CN)": 62.0, - "MassiveIntentClassification (pl)": 59.48, "MassiveIntentClassification (es)": 59.7, "MassiveIntentClassification (ja)": 60.9, "MassiveIntentClassification (ta)": 36.82, @@ -16643,7 +17794,8 @@ "MassiveIntentClassification (fa)": 61.03, "MassiveIntentClassification (da)": 57.75, "MassiveIntentClassification (sv)": 59.43, - "MassiveScenarioClassification": 64.52, + "MassiveScenarioClassification (pl)": 65.04, + "MassiveScenarioClassification (fr)": 66.09, "MassiveScenarioClassification (zh-CN)": 67.45, "MassiveScenarioClassification (bn)": 41.17, "MassiveScenarioClassification (sw)": 34.86, @@ -16689,11 +17841,9 @@ "MassiveScenarioClassification (ms)": 61.73, "MassiveScenarioClassification (pt)": 65.83, "MassiveScenarioClassification (ur)": 60.41, - "MassiveScenarioClassification (pl)": 65.04, "MassiveScenarioClassification (nb)": 64.25, "MassiveScenarioClassification (hi)": 65.23, "MassiveScenarioClassification (te)": 46.49, - "MassiveScenarioClassification (fr)": 66.09, "MassiveScenarioClassification (ml)": 47.73, "MultilingualSentiment": 61.9, "NoRecClassification": 46.7, @@ -16731,10 +17881,9 @@ "MLSUMClusteringP2P (ru)": 37.0, "MLSUMClusteringS2S": 36.55, "MLSUMClusteringS2S (ru)": 38.16, - "MasakhaNEWSClusteringP2P": 36.58, + "MasakhaNEWSClusteringP2P (fra)": 40.85, "MasakhaNEWSClusteringP2P (amh)": 40.36, "MasakhaNEWSClusteringP2P (eng)": 49.96, - "MasakhaNEWSClusteringP2P (fra)": 40.85, "MasakhaNEWSClusteringP2P (hau)": 19.39, "MasakhaNEWSClusteringP2P (ibo)": 33.81, "MasakhaNEWSClusteringP2P (lin)": 51.98, @@ -16748,10 +17897,9 @@ "MasakhaNEWSClusteringP2P (tir)": 42.02, "MasakhaNEWSClusteringP2P (xho)": 27.68, "MasakhaNEWSClusteringP2P (yor)": 27.29, - "MasakhaNEWSClusteringS2S": 33.9, + "MasakhaNEWSClusteringS2S (fra)": 36.5, "MasakhaNEWSClusteringS2S (amh)": 42.28, "MasakhaNEWSClusteringS2S (eng)": 25.74, - "MasakhaNEWSClusteringS2S (fra)": 36.5, "MasakhaNEWSClusteringS2S (hau)": 9.2, "MasakhaNEWSClusteringS2S (ibo)": 33.37, "MasakhaNEWSClusteringS2S (lin)": 47.76, @@ -16784,10 +17932,10 @@ { "Model": "paraphrase-multilingual-MiniLM-L12-v2", "CDSC-E": 72.22, - "OpusparcusPC": 92.01, + "OpusparcusPC (fr)": 92.01, "PPC": 91.8, "PSC": 97.14, - "PawsXPairClassification": 56.94, + "PawsXPairClassification (fr)": 56.94, "SICK-E-PL": 71.94, "SprintDuplicateQuestions": 89.46, "TwitterSemEval2015": 62.06, @@ -16796,20 +17944,18 @@ { "Model": "paraphrase-multilingual-MiniLM-L12-v2", "CDSC-E": 72.32, - "OpusparcusPC": 92.01, + "OpusparcusPC (fr)": 92.01, "OpusparcusPC (de)": 96.63, "OpusparcusPC (en)": 98.59, "OpusparcusPC (fi)": 93.2, - "OpusparcusPC (fr)": 92.01, "OpusparcusPC (ru)": 88.25, "OpusparcusPC (sv)": 93.99, "PPC": 92.36, "PSC": 97.14, - "PawsXPairClassification": 57.03, + "PawsXPairClassification (fr)": 57.13, "PawsXPairClassification (de)": 53.34, "PawsXPairClassification (en)": 55.94, "PawsXPairClassification (es)": 54.61, - "PawsXPairClassification (fr)": 57.13, "PawsXPairClassification (ja)": 48.84, "PawsXPairClassification (ko)": 49.86, "PawsXPairClassification (zh)": 54.59, @@ -16918,11 +18064,10 @@ "MSMARCO": 23.72, "MSMARCO-PL": 10.39, "MedicalRetrieval": 15.46, - "MintakaRetrieval": 21.53, + "MintakaRetrieval (fr)": 21.53, "MintakaRetrieval (ar)": 12.61, "MintakaRetrieval (de)": 21.77, "MintakaRetrieval (es)": 21.59, - "MintakaRetrieval (fr)": 21.53, "MintakaRetrieval (hi)": 16.76, "MintakaRetrieval (it)": 22.23, "MintakaRetrieval (ja)": 14.33, @@ -16959,7 +18104,7 @@ "Touche2020": 16.06, "VideoRetrieval": 14.71, "WinoGrande": 46.52, - "XPQARetrieval": 42.51, + "XPQARetrieval (fr)": 42.51, "XPQARetrieval (ara-ara)": 22.97, "XPQARetrieval (eng-ara)": 17.17, "XPQARetrieval (ara-eng)": 25.5, @@ -17074,10 +18219,21 @@ "STS14": 78.85, "STS15": 85.84, "STS16": 81.05, - "STS17": 81.71, - "STS22": 70.55, + "STS17 (ar-ar)": 79.16, + "STS17 (en-ar)": 81.22, + "STS17 (en-de)": 84.22, + "STS17 (en-en)": 86.87, + "STS17 (en-tr)": 76.74, + "STS17 (es-en)": 84.44, + "STS17 (es-es)": 85.56, + "STS17 (fr-en)": 76.59, + "STS17 (it-en)": 82.35, + "STS17 (ko-ko)": 77.03, + "STS17 (nl-en)": 81.71, + "STS22 (pl)": 33.73, + "STS22 (fr)": 70.55, "STSBenchmark": 84.42, - "STSBenchmarkMultilingualSTS": 79.9 + "STSBenchmarkMultilingualSTS (fr)": 79.9 } ] }, @@ -17125,7 +18281,10 @@ "f1": [ { "Model": "paraphrase-multilingual-mpnet-base-v2", - "BUCC": 97.56, + "BUCC (de-en)": 98.59, + "BUCC (fr-en)": 96.89, + "BUCC (ru-en)": 96.44, + "BUCC (zh-en)": 97.56, "BornholmBitextMining": 18.18, "Tatoeba (rus-eng)": 92.92, "Tatoeba (slv-eng)": 97.08, @@ -17238,8 +18397,7 @@ "Tatoeba (cat-eng)": 96.05, "Tatoeba (zsm-eng)": 95.8, "Tatoeba (ces-eng)": 95.73, - "Tatoeba (eus-eng)": 31.33, - "Tatoeba": 95.8 + "Tatoeba (eus-eng)": 31.33 } ] }, @@ -17248,19 +18406,17 @@ { "Model": "paraphrase-multilingual-mpnet-base-v2", "AllegroReviews": 33.86, - "AmazonCounterfactualClassification (en-ext)": 76.25, + "AmazonCounterfactualClassification (en-ext)": 76.23, "AmazonCounterfactualClassification (en)": 75.81, - "AmazonCounterfactualClassification (de)": 69.96, - "AmazonCounterfactualClassification (ja)": 69.78, - "AmazonCounterfactualClassification": 69.79, + "AmazonCounterfactualClassification (de)": 69.95, + "AmazonCounterfactualClassification (ja)": 69.79, "AmazonPolarityClassification": 76.41, - "AmazonReviewsClassification (en)": 38.52, - "AmazonReviewsClassification (de)": 39.53, - "AmazonReviewsClassification (es)": 39.97, - "AmazonReviewsClassification (fr)": 38.98, - "AmazonReviewsClassification (ja)": 36.65, + "AmazonReviewsClassification (en)": 38.51, + "AmazonReviewsClassification (de)": 39.52, + "AmazonReviewsClassification (es)": 39.99, + "AmazonReviewsClassification (fr)": 39.0, + "AmazonReviewsClassification (ja)": 36.64, "AmazonReviewsClassification (zh)": 37.74, - "AmazonReviewsClassification": 37.74, "AngryTweetsClassification": 54.84, "Banking77Classification": 81.07, "CBD": 65.0, @@ -17276,21 +18432,19 @@ "LccSentimentClassification": 58.4, "MTOPDomainClassification (en)": 89.24, "MTOPDomainClassification (de)": 85.73, - "MTOPDomainClassification (es)": 86.98, + "MTOPDomainClassification (es)": 86.96, "MTOPDomainClassification (fr)": 81.21, "MTOPDomainClassification (hi)": 84.76, "MTOPDomainClassification (th)": 82.51, - "MTOPDomainClassification": 82.51, "MTOPIntentClassification (en)": 68.69, - "MTOPIntentClassification (de)": 61.26, - "MTOPIntentClassification (es)": 66.6, - "MTOPIntentClassification (fr)": 59.75, - "MTOPIntentClassification (hi)": 62.38, - "MTOPIntentClassification (th)": 64.77, - "MTOPIntentClassification": 64.8, + "MTOPIntentClassification (de)": 61.27, + "MTOPIntentClassification (es)": 66.59, + "MTOPIntentClassification (fr)": 59.76, + "MTOPIntentClassification (hi)": 62.37, + "MTOPIntentClassification (th)": 64.8, "MasakhaNEWSClassification (amh)": 78.83, "MasakhaNEWSClassification (eng)": 75.39, - "MasakhaNEWSClassification (fra)": 72.94, + "MasakhaNEWSClassification (fra)": 78.1, "MasakhaNEWSClassification (hau)": 54.49, "MasakhaNEWSClassification (ibo)": 46.79, "MasakhaNEWSClassification (lin)": 69.77, @@ -17304,7 +18458,6 @@ "MasakhaNEWSClassification (tir)": 45.04, "MasakhaNEWSClassification (xho)": 48.82, "MasakhaNEWSClassification (yor)": 58.3, - "MasakhaNEWSClassification": 78.1, "MassiveIntentClassification (km)": 45.48, "MassiveIntentClassification (sv)": 64.71, "MassiveIntentClassification (mn)": 56.61, @@ -17312,7 +18465,7 @@ "MassiveIntentClassification (zh-TW)": 62.33, "MassiveIntentClassification (pt)": 64.88, "MassiveIntentClassification (nl)": 63.57, - "MassiveIntentClassification (fr)": 64.8, + "MassiveIntentClassification (fr)": 61.88, "MassiveIntentClassification (is)": 37.09, "MassiveIntentClassification (hu)": 63.85, "MassiveIntentClassification (az)": 56.98, @@ -17350,13 +18503,12 @@ "MassiveIntentClassification (it)": 64.69, "MassiveIntentClassification (ml)": 54.34, "MassiveIntentClassification (ms)": 60.72, - "MassiveIntentClassification (pl)": 64.32, + "MassiveIntentClassification (pl)": 64.29, "MassiveIntentClassification (nb)": 62.62, "MassiveIntentClassification (ru)": 63.23, "MassiveIntentClassification (am)": 41.56, "MassiveIntentClassification (tl)": 38.83, "MassiveIntentClassification (el)": 62.63, - "MassiveIntentClassification": 61.88, "MassiveScenarioClassification (jv)": 44.22, "MassiveScenarioClassification (kn)": 56.08, "MassiveScenarioClassification (ar)": 57.79, @@ -17366,7 +18518,7 @@ "MassiveScenarioClassification (pt)": 70.08, "MassiveScenarioClassification (ru)": 69.92, "MassiveScenarioClassification (zh-TW)": 68.71, - "MassiveScenarioClassification (fr)": 70.71, + "MassiveScenarioClassification (fr)": 67.9, "MassiveScenarioClassification (ms)": 65.85, "MassiveScenarioClassification (az)": 61.52, "MassiveScenarioClassification (ka)": 57.3, @@ -17397,7 +18549,7 @@ "MassiveScenarioClassification (te)": 58.79, "MassiveScenarioClassification (en)": 75.35, "MassiveScenarioClassification (sv)": 71.6, - "MassiveScenarioClassification (pl)": 68.99, + "MassiveScenarioClassification (pl)": 68.98, "MassiveScenarioClassification (ro)": 67.94, "MassiveScenarioClassification (hy)": 63.03, "MassiveScenarioClassification (am)": 48.96, @@ -17408,7 +18560,6 @@ "MassiveScenarioClassification (my)": 63.03, "MassiveScenarioClassification (km)": 53.13, "MassiveScenarioClassification (tl)": 43.98, - "MassiveScenarioClassification": 67.9, "MultilingualSentiment": 66.49, "NoRecClassification": 50.32, "NordicLangClassification": 41.57, @@ -17447,7 +18598,7 @@ "MLSUMClusteringS2S": 37.53, "MasakhaNEWSClusteringP2P (amh)": 46.85, "MasakhaNEWSClusteringP2P (eng)": 47.3, - "MasakhaNEWSClusteringP2P (fra)": 53.3, + "MasakhaNEWSClusteringP2P (fra)": 41.57, "MasakhaNEWSClusteringP2P (hau)": 27.61, "MasakhaNEWSClusteringP2P (ibo)": 41.32, "MasakhaNEWSClusteringP2P (lin)": 58.37, @@ -17461,10 +18612,9 @@ "MasakhaNEWSClusteringP2P (tir)": 48.33, "MasakhaNEWSClusteringP2P (xho)": 29.47, "MasakhaNEWSClusteringP2P (yor)": 28.25, - "MasakhaNEWSClusteringP2P": 41.57, "MasakhaNEWSClusteringS2S (amh)": 51.54, "MasakhaNEWSClusteringS2S (eng)": 43.28, - "MasakhaNEWSClusteringS2S (fra)": 37.92, + "MasakhaNEWSClusteringS2S (fra)": 30.88, "MasakhaNEWSClusteringS2S (hau)": 17.97, "MasakhaNEWSClusteringS2S (ibo)": 34.56, "MasakhaNEWSClusteringS2S (lin)": 57.43, @@ -17478,7 +18628,6 @@ "MasakhaNEWSClusteringS2S (tir)": 51.95, "MasakhaNEWSClusteringS2S (xho)": 21.26, "MasakhaNEWSClusteringS2S (yor)": 28.88, - "MasakhaNEWSClusteringS2S": 30.88, "MedrxivClusteringP2P": 31.96, "MedrxivClusteringS2S": 31.7, "RedditClustering": 45.24, @@ -17527,17 +18676,15 @@ "OpusparcusPC (fr)": 93.45, "OpusparcusPC (ru)": 90.47, "OpusparcusPC (sv)": 95.16, - "OpusparcusPC": 93.45, "PPC": 93.67, "PSC": 98.26, "PawsXPairClassification (de)": 55.71, "PawsXPairClassification (en)": 60.12, "PawsXPairClassification (es)": 56.94, - "PawsXPairClassification (fr)": 58.33, + "PawsXPairClassification (fr)": 58.17, "PawsXPairClassification (ja)": 49.37, "PawsXPairClassification (ko)": 50.78, "PawsXPairClassification (zh)": 55.47, - "PawsXPairClassification": 58.17, "SICK-E-PL": 77.22, "SprintDuplicateQuestions": 91.1, "TERRa": 64.57, @@ -17547,10 +18694,10 @@ { "Model": "paraphrase-multilingual-mpnet-base-v2", "CDSC-E": 75.76, - "OpusparcusPC": 93.45, + "OpusparcusPC (fr)": 93.45, "PPC": 93.67, "PSC": 98.26, - "PawsXPairClassification": 58.14, + "PawsXPairClassification (fr)": 58.14, "SICK-E-PL": 77.22, "SprintDuplicateQuestions": 90.55, "TwitterSemEval2015": 66.75, @@ -17640,7 +18787,6 @@ "MintakaRetrieval (it)": 25.62, "MintakaRetrieval (ja)": 15.46, "MintakaRetrieval (pt)": 26.15, - "MintakaRetrieval": 24.45, "NFCorpus": 25.49, "NFCorpus-PL": 18.53, "NQ": 33.6, @@ -17709,7 +18855,7 @@ "XPQARetrieval (cmn-cmn)": 42.54, "XPQARetrieval (eng-cmn)": 20.91, "XPQARetrieval (cmn-eng)": 42.81, - "XPQARetrieval": 46.22 + "XPQARetrieval (fr)": 46.22 } ] }, @@ -17788,10 +18934,21 @@ "STS14": 80.81, "STS15": 87.48, "STS16": 83.2, - "STS17": 82.51, - "STS22": 74.3, + "STS17 (ar-ar)": 79.1, + "STS17 (en-ar)": 80.85, + "STS17 (en-de)": 83.28, + "STS17 (en-en)": 86.99, + "STS17 (en-tr)": 74.9, + "STS17 (es-en)": 86.11, + "STS17 (es-es)": 85.14, + "STS17 (fr-en)": 81.17, + "STS17 (it-en)": 84.24, + "STS17 (ko-ko)": 83.41, + "STS17 (nl-en)": 82.51, + "STS22 (pl)": 33.64, + "STS22 (fr)": 74.3, "STSBenchmark": 86.82, - "STSBenchmarkMultilingualSTS": 84.69 + "STSBenchmarkMultilingualSTS (fr)": 84.69 } ] }, @@ -18647,8 +19804,12 @@ "DKHateClassification": 59.36, "DanishPoliticalCommentsClassification": 28.32, "LccSentimentClassification": 47.2, - "MassiveIntentClassification": 69.11, - "MassiveScenarioClassification": 75.96, + "MassiveIntentClassification (da)": 42.84, + "MassiveIntentClassification (nb)": 42.74, + "MassiveIntentClassification (sv)": 69.11, + "MassiveScenarioClassification (da)": 49.64, + "MassiveScenarioClassification (nb)": 49.49, + "MassiveScenarioClassification (sv)": 75.96, "NoRecClassification": 43.53, "NordicLangClassification": 51.45, "NorwegianParliament": 55.74, @@ -18690,12 +19851,12 @@ "accuracy": [ { "Model": "sentence-camembert-base", - "AmazonReviewsClassification": 36.03, - "MTOPDomainClassification": 77.1, - "MTOPIntentClassification": 43.44, - "MasakhaNEWSClassification": 70.36, - "MassiveIntentClassification": 51.59, - "MassiveScenarioClassification": 61.28 + "AmazonReviewsClassification (fr)": 36.03, + "MTOPDomainClassification (fr)": 77.1, + "MTOPIntentClassification (fr)": 43.44, + "MasakhaNEWSClassification (fra)": 70.36, + "MassiveIntentClassification (fr)": 51.59, + "MassiveScenarioClassification (fr)": 61.28 } ] }, @@ -18708,8 +19869,8 @@ "HALClusteringS2S": 20.22, "MLSUMClusteringP2P": 35.98, "MLSUMClusteringS2S": 27.05, - "MasakhaNEWSClusteringP2P": 36.03, - "MasakhaNEWSClusteringS2S": 30.77 + "MasakhaNEWSClusteringP2P (fra)": 36.03, + "MasakhaNEWSClusteringS2S (fra)": 30.77 } ] }, @@ -18717,13 +19878,13 @@ "max_ap": [ { "Model": "sentence-camembert-base", - "OpusparcusPC": 92.05, - "PawsXPairClassification": 57.44 + "OpusparcusPC (fr)": 92.05, + "PawsXPairClassification (fr)": 57.44 }, { "Model": "sentence-camembert-base", - "OpusparcusPC": 92.05, - "PawsXPairClassification": 57.44 + "OpusparcusPC (fr)": 92.05, + "PawsXPairClassification (fr)": 57.44 } ] }, @@ -18742,9 +19903,9 @@ "Model": "sentence-camembert-base", "AlloprofRetrieval": 21.94, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 13.36, + "MintakaRetrieval (fr)": 13.36, "SyntecRetrieval": 68.62, - "XPQARetrieval": 57.92 + "XPQARetrieval (fr)": 57.92 } ] }, @@ -18753,8 +19914,8 @@ { "Model": "sentence-camembert-base", "SICKFr": 74.18, - "STS22": 77.54, - "STSBenchmarkMultilingualSTS": 81.64 + "STS22 (fr)": 77.54, + "STSBenchmarkMultilingualSTS (fr)": 81.64 } ] }, @@ -18781,12 +19942,12 @@ "accuracy": [ { "Model": "sentence-camembert-large", - "AmazonReviewsClassification": 37.97, - "MTOPDomainClassification": 85.74, - "MTOPIntentClassification": 58.62, - "MasakhaNEWSClassification": 80.62, - "MassiveIntentClassification": 62.65, - "MassiveScenarioClassification": 69.29 + "AmazonReviewsClassification (fr)": 37.97, + "MTOPDomainClassification (fr)": 85.74, + "MTOPIntentClassification (fr)": 58.62, + "MasakhaNEWSClassification (fra)": 80.62, + "MassiveIntentClassification (fr)": 62.65, + "MassiveScenarioClassification (fr)": 69.29 } ] }, @@ -18799,8 +19960,8 @@ "HALClusteringS2S": 23.9, "MLSUMClusteringP2P": 42.04, "MLSUMClusteringS2S": 32.29, - "MasakhaNEWSClusteringP2P": 54.51, - "MasakhaNEWSClusteringS2S": 44.73 + "MasakhaNEWSClusteringP2P (fra)": 54.51, + "MasakhaNEWSClusteringS2S (fra)": 44.73 } ] }, @@ -18808,13 +19969,13 @@ "max_ap": [ { "Model": "sentence-camembert-large", - "OpusparcusPC": 94.63, - "PawsXPairClassification": 59.59 + "OpusparcusPC (fr)": 94.63, + "PawsXPairClassification (fr)": 59.59 }, { "Model": "sentence-camembert-large", - "OpusparcusPC": 94.63, - "PawsXPairClassification": 59.61 + "OpusparcusPC (fr)": 94.63, + "PawsXPairClassification (fr)": 59.61 } ] }, @@ -18833,9 +19994,9 @@ "Model": "sentence-camembert-large", "AlloprofRetrieval": 31.62, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 21.87, + "MintakaRetrieval (fr)": 21.87, "SyntecRetrieval": 81.11, - "XPQARetrieval": 65.62 + "XPQARetrieval (fr)": 65.62 } ] }, @@ -18844,8 +20005,8 @@ { "Model": "sentence-camembert-large", "SICKFr": 77.7, - "STS22": 81.73, - "STSBenchmarkMultilingualSTS": 85.79 + "STS22 (fr)": 81.73, + "STSBenchmarkMultilingualSTS (fr)": 85.79 } ] }, @@ -18872,12 +20033,12 @@ "accuracy": [ { "Model": "sentence-croissant-llm-base", - "AmazonReviewsClassification": 34.79, - "MTOPDomainClassification": 85.52, - "MTOPIntentClassification": 63.12, - "MasakhaNEWSClassification": 79.29, - "MassiveIntentClassification": 59.41, - "MassiveScenarioClassification": 65.29 + "AmazonReviewsClassification (fr)": 34.79, + "MTOPDomainClassification (fr)": 85.52, + "MTOPIntentClassification (fr)": 63.12, + "MasakhaNEWSClassification (fra)": 79.29, + "MassiveIntentClassification (fr)": 59.41, + "MassiveScenarioClassification (fr)": 65.29 } ] }, @@ -18890,8 +20051,8 @@ "HALClusteringS2S": 23.4, "MLSUMClusteringP2P": 42.94, "MLSUMClusteringS2S": 33.91, - "MasakhaNEWSClusteringP2P": 53.94, - "MasakhaNEWSClusteringS2S": 41.05 + "MasakhaNEWSClusteringP2P (fra)": 53.94, + "MasakhaNEWSClusteringS2S (fra)": 41.05 } ] }, @@ -18899,13 +20060,13 @@ "max_ap": [ { "Model": "sentence-croissant-llm-base", - "OpusparcusPC": 91.42, - "PawsXPairClassification": 63.13 + "OpusparcusPC (fr)": 91.42, + "PawsXPairClassification (fr)": 63.13 }, { "Model": "sentence-croissant-llm-base", - "OpusparcusPC": 91.42, - "PawsXPairClassification": 63.19 + "OpusparcusPC (fr)": 91.42, + "PawsXPairClassification (fr)": 63.19 } ] }, @@ -18924,9 +20085,9 @@ "Model": "sentence-croissant-llm-base", "AlloprofRetrieval": 29.97, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 21.31, + "MintakaRetrieval (fr)": 21.31, "SyntecRetrieval": 74.2, - "XPQARetrieval": 58.57 + "XPQARetrieval (fr)": 58.57 } ] }, @@ -18935,8 +20096,8 @@ { "Model": "sentence-croissant-llm-base", "SICKFr": 69.6, - "STS22": 78.77, - "STSBenchmarkMultilingualSTS": 79.23 + "STS22 (fr)": 78.77, + "STSBenchmarkMultilingualSTS (fr)": 79.23 } ] }, @@ -18963,17 +20124,135 @@ "accuracy": [ { "Model": "sentence-t5-base", - "AmazonCounterfactualClassification": 46.05, + "AmazonCounterfactualClassification (de)": 69.98, + "AmazonCounterfactualClassification (en)": 75.82, + "AmazonCounterfactualClassification (en-ext)": 76.81, + "AmazonCounterfactualClassification (ja)": 46.05, "AmazonPolarityClassification": 85.12, - "AmazonReviewsClassification": 21.53, + "AmazonReviewsClassification (de)": 37.9, + "AmazonReviewsClassification (en)": 44.94, + "AmazonReviewsClassification (es)": 37.33, + "AmazonReviewsClassification (fr)": 37.35, + "AmazonReviewsClassification (ja)": 22.29, + "AmazonReviewsClassification (zh)": 21.53, "Banking77Classification": 76.48, "EmotionClassification": 51.35, "ImdbClassification": 77.34, - "MTOPDomainClassification": 16.21, - "MTOPIntentClassification": 5.21, - "MasakhaNEWSClassification": 81.21, - "MassiveIntentClassification": 4.63, - "MassiveScenarioClassification": 7.91, + "MTOPDomainClassification (de)": 76.98, + "MTOPDomainClassification (en)": 90.34, + "MTOPDomainClassification (es)": 73.61, + "MTOPDomainClassification (fr)": 75.03, + "MTOPDomainClassification (hi)": 21.4, + "MTOPDomainClassification (th)": 16.21, + "MTOPIntentClassification (de)": 44.43, + "MTOPIntentClassification (en)": 63.32, + "MTOPIntentClassification (es)": 42.03, + "MTOPIntentClassification (fr)": 43.85, + "MTOPIntentClassification (hi)": 3.8, + "MTOPIntentClassification (th)": 5.21, + "MasakhaNEWSClassification (fra)": 81.21, + "MassiveIntentClassification (af)": 34.32, + "MassiveIntentClassification (am)": 2.38, + "MassiveIntentClassification (ar)": 4.53, + "MassiveIntentClassification (az)": 31.76, + "MassiveIntentClassification (bn)": 2.58, + "MassiveIntentClassification (cy)": 28.94, + "MassiveIntentClassification (da)": 38.82, + "MassiveIntentClassification (de)": 45.23, + "MassiveIntentClassification (el)": 10.05, + "MassiveIntentClassification (en)": 69.74, + "MassiveIntentClassification (es)": 45.32, + "MassiveIntentClassification (fa)": 3.58, + "MassiveIntentClassification (fi)": 33.52, + "MassiveIntentClassification (fr)": 51.13, + "MassiveIntentClassification (he)": 2.63, + "MassiveIntentClassification (hi)": 2.68, + "MassiveIntentClassification (hu)": 32.31, + "MassiveIntentClassification (hy)": 3.33, + "MassiveIntentClassification (id)": 35.5, + "MassiveIntentClassification (is)": 29.82, + "MassiveIntentClassification (it)": 45.59, + "MassiveIntentClassification (ja)": 3.67, + "MassiveIntentClassification (jv)": 31.15, + "MassiveIntentClassification (ka)": 2.77, + "MassiveIntentClassification (km)": 5.66, + "MassiveIntentClassification (kn)": 2.59, + "MassiveIntentClassification (ko)": 2.34, + "MassiveIntentClassification (lv)": 33.97, + "MassiveIntentClassification (ml)": 2.55, + "MassiveIntentClassification (mn)": 14.7, + "MassiveIntentClassification (ms)": 33.12, + "MassiveIntentClassification (my)": 4.42, + "MassiveIntentClassification (nb)": 38.53, + "MassiveIntentClassification (nl)": 37.96, + "MassiveIntentClassification (pl)": 34.41, + "MassiveIntentClassification (pt)": 43.35, + "MassiveIntentClassification (ro)": 42.69, + "MassiveIntentClassification (ru)": 14.82, + "MassiveIntentClassification (sl)": 34.54, + "MassiveIntentClassification (sq)": 38.54, + "MassiveIntentClassification (sv)": 35.98, + "MassiveIntentClassification (sw)": 32.14, + "MassiveIntentClassification (ta)": 1.41, + "MassiveIntentClassification (te)": 2.5, + "MassiveIntentClassification (th)": 3.71, + "MassiveIntentClassification (tl)": 36.04, + "MassiveIntentClassification (tr)": 33.77, + "MassiveIntentClassification (ur)": 2.99, + "MassiveIntentClassification (vi)": 22.62, + "MassiveIntentClassification (zh-CN)": 1.12, + "MassiveIntentClassification (zh-TW)": 4.63, + "MassiveScenarioClassification (af)": 44.45, + "MassiveScenarioClassification (am)": 7.51, + "MassiveScenarioClassification (ar)": 12.32, + "MassiveScenarioClassification (az)": 38.41, + "MassiveScenarioClassification (bn)": 8.45, + "MassiveScenarioClassification (cy)": 35.04, + "MassiveScenarioClassification (da)": 48.36, + "MassiveScenarioClassification (de)": 59.12, + "MassiveScenarioClassification (el)": 17.68, + "MassiveScenarioClassification (en)": 72.32, + "MassiveScenarioClassification (es)": 55.61, + "MassiveScenarioClassification (fa)": 6.86, + "MassiveScenarioClassification (fi)": 41.34, + "MassiveScenarioClassification (fr)": 59.92, + "MassiveScenarioClassification (he)": 7.86, + "MassiveScenarioClassification (hi)": 7.63, + "MassiveScenarioClassification (hu)": 41.31, + "MassiveScenarioClassification (hy)": 9.23, + "MassiveScenarioClassification (id)": 44.64, + "MassiveScenarioClassification (is)": 39.63, + "MassiveScenarioClassification (it)": 54.58, + "MassiveScenarioClassification (ja)": 4.96, + "MassiveScenarioClassification (jv)": 40.73, + "MassiveScenarioClassification (ka)": 7.51, + "MassiveScenarioClassification (km)": 8.73, + "MassiveScenarioClassification (kn)": 7.99, + "MassiveScenarioClassification (ko)": 6.03, + "MassiveScenarioClassification (lv)": 36.42, + "MassiveScenarioClassification (ml)": 6.96, + "MassiveScenarioClassification (mn)": 19.85, + "MassiveScenarioClassification (ms)": 43.18, + "MassiveScenarioClassification (my)": 9.46, + "MassiveScenarioClassification (nb)": 46.6, + "MassiveScenarioClassification (nl)": 50.0, + "MassiveScenarioClassification (pl)": 42.3, + "MassiveScenarioClassification (pt)": 52.24, + "MassiveScenarioClassification (ro)": 53.7, + "MassiveScenarioClassification (ru)": 20.69, + "MassiveScenarioClassification (sl)": 39.79, + "MassiveScenarioClassification (sq)": 50.16, + "MassiveScenarioClassification (sv)": 46.69, + "MassiveScenarioClassification (sw)": 40.48, + "MassiveScenarioClassification (ta)": 7.47, + "MassiveScenarioClassification (te)": 6.87, + "MassiveScenarioClassification (th)": 8.26, + "MassiveScenarioClassification (tl)": 48.94, + "MassiveScenarioClassification (tr)": 41.83, + "MassiveScenarioClassification (ur)": 9.77, + "MassiveScenarioClassification (vi)": 30.01, + "MassiveScenarioClassification (zh-CN)": 4.17, + "MassiveScenarioClassification (zh-TW)": 7.91, "ToxicConversationsClassification": 68.2, "TweetSentimentExtractionClassification": 62.71 } @@ -18994,8 +20273,8 @@ "HALClusteringS2S": 17.72, "MLSUMClusteringP2P": 40.77, "MLSUMClusteringS2S": 30.06, - "MasakhaNEWSClusteringP2P": 61.9, - "MasakhaNEWSClusteringS2S": 35.64, + "MasakhaNEWSClusteringP2P (fra)": 61.9, + "MasakhaNEWSClusteringS2S (fra)": 35.64, "MedrxivClusteringP2P": 33.2, "MedrxivClusteringS2S": 26.13, "RedditClustering": 52.93, @@ -19012,16 +20291,16 @@ "max_ap": [ { "Model": "sentence-t5-base", - "OpusparcusPC": 89.4, - "PawsXPairClassification": 55.35, + "OpusparcusPC (fr)": 89.4, + "PawsXPairClassification (fr)": 55.35, "SprintDuplicateQuestions": 91.23, "TwitterSemEval2015": 78.25, "TwitterURLCorpus": 86.05 }, { "Model": "sentence-t5-base", - "OpusparcusPC": 89.41, - "PawsXPairClassification": 55.43, + "OpusparcusPC (fr)": 89.41, + "PawsXPairClassification (fr)": 55.43, "SprintDuplicateQuestions": 91.23, "TwitterSemEval2015": 78.25, "TwitterURLCorpus": 86.05 @@ -19055,7 +20334,7 @@ "FiQA2018": 34.83, "HotpotQA": 33.2, "MSMARCO": 20.7, - "MintakaRetrieval": 21.04, + "MintakaRetrieval (fr)": 21.04, "NFCorpus": 28.65, "NQ": 36.32, "QuoraRetrieval": 85.49, @@ -19064,7 +20343,7 @@ "SyntecRetrieval": 67.0, "TRECCOVID": 40.7, "Touche2020": 20.3, - "XPQARetrieval": 45.19 + "XPQARetrieval (fr)": 45.19 } ] }, @@ -19080,10 +20359,37 @@ "STS14": 82.19, "STS15": 87.46, "STS16": 84.03, - "STS17": 36.46, - "STS22": 20.15, + "STS17 (ar-ar)": 13.36, + "STS17 (en-ar)": -5.65, + "STS17 (en-de)": 67.11, + "STS17 (en-en)": 89.57, + "STS17 (en-tr)": -0.02, + "STS17 (es-en)": 47.72, + "STS17 (es-es)": 79.94, + "STS17 (fr-en)": 56.61, + "STS17 (it-en)": 30.46, + "STS17 (ko-ko)": 10.06, + "STS17 (nl-en)": 36.46, + "STS22 (ar)": 31.2, + "STS22 (de)": 42.08, + "STS22 (de-en)": 46.9, + "STS22 (de-fr)": 55.04, + "STS22 (de-pl)": 33.94, + "STS22 (en)": 62.66, + "STS22 (es)": 53.81, + "STS22 (es-en)": 65.19, + "STS22 (es-it)": 55.29, + "STS22 (fr)": 77.69, + "STS22 (fr-pl)": 28.17, + "STS22 (it)": 60.65, + "STS22 (pl)": 24.42, + "STS22 (pl-en)": 42.97, + "STS22 (ru)": 12.13, + "STS22 (tr)": 40.45, + "STS22 (zh)": 32.9, + "STS22 (zh-en)": 20.15, "STSBenchmark": 85.52, - "STSBenchmarkMultilingualSTS": 74.04 + "STSBenchmarkMultilingualSTS (fr)": 74.04 } ] }, @@ -19108,8 +20414,122 @@ "f1": [ { "Model": "sentence-t5-large", - "BUCC": 0.95, - "Tatoeba": 14.67 + "BUCC (de-en)": 87.0, + "BUCC (fr-en)": 88.91, + "BUCC (ru-en)": 0.44, + "BUCC (zh-en)": 0.95, + "Tatoeba (afr-eng)": 23.7, + "Tatoeba (amh-eng)": 0.65, + "Tatoeba (ang-eng)": 30.98, + "Tatoeba (ara-eng)": 0.48, + "Tatoeba (arq-eng)": 0.68, + "Tatoeba (arz-eng)": 0.22, + "Tatoeba (ast-eng)": 55.3, + "Tatoeba (awa-eng)": 1.03, + "Tatoeba (aze-eng)": 5.83, + "Tatoeba (bel-eng)": 1.66, + "Tatoeba (ben-eng)": 0.0, + "Tatoeba (ber-eng)": 5.62, + "Tatoeba (bos-eng)": 12.23, + "Tatoeba (bre-eng)": 5.84, + "Tatoeba (bul-eng)": 1.35, + "Tatoeba (cat-eng)": 48.56, + "Tatoeba (cbk-eng)": 46.97, + "Tatoeba (ceb-eng)": 9.79, + "Tatoeba (ces-eng)": 6.0, + "Tatoeba (cha-eng)": 24.21, + "Tatoeba (cmn-eng)": 2.26, + "Tatoeba (cor-eng)": 4.03, + "Tatoeba (csb-eng)": 9.53, + "Tatoeba (cym-eng)": 9.17, + "Tatoeba (dan-eng)": 34.63, + "Tatoeba (deu-eng)": 89.31, + "Tatoeba (dsb-eng)": 9.68, + "Tatoeba (dtp-eng)": 4.66, + "Tatoeba (ell-eng)": 0.77, + "Tatoeba (epo-eng)": 26.88, + "Tatoeba (est-eng)": 5.19, + "Tatoeba (eus-eng)": 9.46, + "Tatoeba (fao-eng)": 21.59, + "Tatoeba (fin-eng)": 5.66, + "Tatoeba (fra-eng)": 79.71, + "Tatoeba (fry-eng)": 28.29, + "Tatoeba (gla-eng)": 2.34, + "Tatoeba (gle-eng)": 3.55, + "Tatoeba (glg-eng)": 56.25, + "Tatoeba (gsw-eng)": 24.25, + "Tatoeba (heb-eng)": 0.57, + "Tatoeba (hin-eng)": 0.12, + "Tatoeba (hrv-eng)": 10.29, + "Tatoeba (hsb-eng)": 9.52, + "Tatoeba (hun-eng)": 6.22, + "Tatoeba (hye-eng)": 0.81, + "Tatoeba (ido-eng)": 41.11, + "Tatoeba (ile-eng)": 54.0, + "Tatoeba (ina-eng)": 75.47, + "Tatoeba (ind-eng)": 13.02, + "Tatoeba (isl-eng)": 8.98, + "Tatoeba (ita-eng)": 67.23, + "Tatoeba (jav-eng)": 8.54, + "Tatoeba (jpn-eng)": 0.99, + "Tatoeba (kab-eng)": 1.85, + "Tatoeba (kat-eng)": 1.37, + "Tatoeba (kaz-eng)": 0.67, + "Tatoeba (khm-eng)": 0.56, + "Tatoeba (kor-eng)": 1.73, + "Tatoeba (kur-eng)": 9.23, + "Tatoeba (kzj-eng)": 5.38, + "Tatoeba (lat-eng)": 21.3, + "Tatoeba (lfn-eng)": 40.48, + "Tatoeba (lit-eng)": 5.38, + "Tatoeba (lvs-eng)": 6.83, + "Tatoeba (mal-eng)": 0.45, + "Tatoeba (mar-eng)": 0.01, + "Tatoeba (max-eng)": 16.44, + "Tatoeba (mhr-eng)": 0.33, + "Tatoeba (mkd-eng)": 0.4, + "Tatoeba (mon-eng)": 2.48, + "Tatoeba (nds-eng)": 34.66, + "Tatoeba (nld-eng)": 42.72, + "Tatoeba (nno-eng)": 24.08, + "Tatoeba (nob-eng)": 34.17, + "Tatoeba (nov-eng)": 55.01, + "Tatoeba (oci-eng)": 29.15, + "Tatoeba (orv-eng)": 0.2, + "Tatoeba (pam-eng)": 6.99, + "Tatoeba (pes-eng)": 0.9, + "Tatoeba (pms-eng)": 30.8, + "Tatoeba (pol-eng)": 12.81, + "Tatoeba (por-eng)": 73.45, + "Tatoeba (ron-eng)": 54.86, + "Tatoeba (rus-eng)": 2.43, + "Tatoeba (slk-eng)": 8.35, + "Tatoeba (slv-eng)": 9.3, + "Tatoeba (spa-eng)": 78.87, + "Tatoeba (sqi-eng)": 11.74, + "Tatoeba (srp-eng)": 5.83, + "Tatoeba (swe-eng)": 35.41, + "Tatoeba (swg-eng)": 28.18, + "Tatoeba (swh-eng)": 7.53, + "Tatoeba (tam-eng)": 0.36, + "Tatoeba (tat-eng)": 1.01, + "Tatoeba (tel-eng)": 1.1, + "Tatoeba (tgl-eng)": 12.4, + "Tatoeba (tha-eng)": 1.58, + "Tatoeba (tuk-eng)": 4.95, + "Tatoeba (tur-eng)": 6.45, + "Tatoeba (tzl-eng)": 37.82, + "Tatoeba (uig-eng)": 0.67, + "Tatoeba (ukr-eng)": 1.88, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (uzb-eng)": 4.79, + "Tatoeba (vie-eng)": 7.03, + "Tatoeba (war-eng)": 9.68, + "Tatoeba (wuu-eng)": 1.28, + "Tatoeba (xho-eng)": 10.64, + "Tatoeba (yid-eng)": 0.57, + "Tatoeba (yue-eng)": 0.88, + "Tatoeba (zsm-eng)": 14.67 } ] }, @@ -19117,17 +20537,135 @@ "accuracy": [ { "Model": "sentence-t5-large", - "AmazonCounterfactualClassification": 45.72, + "AmazonCounterfactualClassification (de)": 67.97, + "AmazonCounterfactualClassification (en)": 75.51, + "AmazonCounterfactualClassification (en-ext)": 75.44, + "AmazonCounterfactualClassification (ja)": 45.72, "AmazonPolarityClassification": 92.87, - "AmazonReviewsClassification": 22.12, + "AmazonReviewsClassification (de)": 43.16, + "AmazonReviewsClassification (en)": 47.12, + "AmazonReviewsClassification (es)": 42.89, + "AmazonReviewsClassification (fr)": 41.48, + "AmazonReviewsClassification (ja)": 22.49, + "AmazonReviewsClassification (zh)": 22.12, "Banking77Classification": 78.46, "EmotionClassification": 51.74, "ImdbClassification": 87.01, - "MTOPDomainClassification": 15.82, - "MTOPIntentClassification": 4.96, - "MasakhaNEWSClassification": 80.43, - "MassiveIntentClassification": 3.49, - "MassiveScenarioClassification": 7.24, + "MTOPDomainClassification (de)": 80.56, + "MTOPDomainClassification (en)": 90.99, + "MTOPDomainClassification (es)": 80.78, + "MTOPDomainClassification (fr)": 79.6, + "MTOPDomainClassification (hi)": 21.22, + "MTOPDomainClassification (th)": 15.82, + "MTOPIntentClassification (de)": 52.5, + "MTOPIntentClassification (en)": 64.98, + "MTOPIntentClassification (es)": 52.07, + "MTOPIntentClassification (fr)": 47.73, + "MTOPIntentClassification (hi)": 3.74, + "MTOPIntentClassification (th)": 4.96, + "MasakhaNEWSClassification (fra)": 80.43, + "MassiveIntentClassification (af)": 38.41, + "MassiveIntentClassification (am)": 2.49, + "MassiveIntentClassification (ar)": 4.7, + "MassiveIntentClassification (az)": 31.77, + "MassiveIntentClassification (bn)": 2.77, + "MassiveIntentClassification (cy)": 31.69, + "MassiveIntentClassification (da)": 41.76, + "MassiveIntentClassification (de)": 52.01, + "MassiveIntentClassification (el)": 9.74, + "MassiveIntentClassification (en)": 71.78, + "MassiveIntentClassification (es)": 54.1, + "MassiveIntentClassification (fa)": 3.86, + "MassiveIntentClassification (fi)": 34.07, + "MassiveIntentClassification (fr)": 57.01, + "MassiveIntentClassification (he)": 2.14, + "MassiveIntentClassification (hi)": 2.97, + "MassiveIntentClassification (hu)": 32.01, + "MassiveIntentClassification (hy)": 3.17, + "MassiveIntentClassification (id)": 34.55, + "MassiveIntentClassification (is)": 32.0, + "MassiveIntentClassification (it)": 52.94, + "MassiveIntentClassification (ja)": 2.9, + "MassiveIntentClassification (jv)": 32.42, + "MassiveIntentClassification (ka)": 2.71, + "MassiveIntentClassification (km)": 5.5, + "MassiveIntentClassification (kn)": 2.41, + "MassiveIntentClassification (ko)": 2.57, + "MassiveIntentClassification (lv)": 35.09, + "MassiveIntentClassification (ml)": 2.95, + "MassiveIntentClassification (mn)": 18.33, + "MassiveIntentClassification (ms)": 29.69, + "MassiveIntentClassification (my)": 3.99, + "MassiveIntentClassification (nb)": 41.29, + "MassiveIntentClassification (nl)": 44.95, + "MassiveIntentClassification (pl)": 37.67, + "MassiveIntentClassification (pt)": 51.96, + "MassiveIntentClassification (ro)": 43.83, + "MassiveIntentClassification (ru)": 17.32, + "MassiveIntentClassification (sl)": 33.71, + "MassiveIntentClassification (sq)": 37.62, + "MassiveIntentClassification (sv)": 40.67, + "MassiveIntentClassification (sw)": 31.9, + "MassiveIntentClassification (ta)": 1.91, + "MassiveIntentClassification (te)": 2.54, + "MassiveIntentClassification (th)": 3.85, + "MassiveIntentClassification (tl)": 36.83, + "MassiveIntentClassification (tr)": 33.0, + "MassiveIntentClassification (ur)": 2.62, + "MassiveIntentClassification (vi)": 22.81, + "MassiveIntentClassification (zh-CN)": 1.09, + "MassiveIntentClassification (zh-TW)": 3.49, + "MassiveScenarioClassification (af)": 50.28, + "MassiveScenarioClassification (am)": 7.15, + "MassiveScenarioClassification (ar)": 12.12, + "MassiveScenarioClassification (az)": 39.68, + "MassiveScenarioClassification (bn)": 8.06, + "MassiveScenarioClassification (cy)": 38.01, + "MassiveScenarioClassification (da)": 51.44, + "MassiveScenarioClassification (de)": 62.71, + "MassiveScenarioClassification (el)": 17.19, + "MassiveScenarioClassification (en)": 73.16, + "MassiveScenarioClassification (es)": 59.56, + "MassiveScenarioClassification (fa)": 6.5, + "MassiveScenarioClassification (fi)": 41.72, + "MassiveScenarioClassification (fr)": 63.6, + "MassiveScenarioClassification (he)": 7.93, + "MassiveScenarioClassification (hi)": 7.85, + "MassiveScenarioClassification (hu)": 41.37, + "MassiveScenarioClassification (hy)": 9.42, + "MassiveScenarioClassification (id)": 44.88, + "MassiveScenarioClassification (is)": 40.86, + "MassiveScenarioClassification (it)": 60.09, + "MassiveScenarioClassification (ja)": 6.56, + "MassiveScenarioClassification (jv)": 40.18, + "MassiveScenarioClassification (ka)": 7.37, + "MassiveScenarioClassification (km)": 9.56, + "MassiveScenarioClassification (kn)": 8.4, + "MassiveScenarioClassification (ko)": 5.96, + "MassiveScenarioClassification (lv)": 41.44, + "MassiveScenarioClassification (ml)": 7.47, + "MassiveScenarioClassification (mn)": 25.36, + "MassiveScenarioClassification (ms)": 39.69, + "MassiveScenarioClassification (my)": 9.68, + "MassiveScenarioClassification (nb)": 49.92, + "MassiveScenarioClassification (nl)": 56.09, + "MassiveScenarioClassification (pl)": 45.2, + "MassiveScenarioClassification (pt)": 57.99, + "MassiveScenarioClassification (ro)": 56.0, + "MassiveScenarioClassification (ru)": 27.47, + "MassiveScenarioClassification (sl)": 41.04, + "MassiveScenarioClassification (sq)": 49.38, + "MassiveScenarioClassification (sv)": 50.97, + "MassiveScenarioClassification (sw)": 40.62, + "MassiveScenarioClassification (ta)": 7.59, + "MassiveScenarioClassification (te)": 7.07, + "MassiveScenarioClassification (th)": 8.52, + "MassiveScenarioClassification (tl)": 49.89, + "MassiveScenarioClassification (tr)": 43.08, + "MassiveScenarioClassification (ur)": 9.31, + "MassiveScenarioClassification (vi)": 27.46, + "MassiveScenarioClassification (zh-CN)": 4.7, + "MassiveScenarioClassification (zh-TW)": 7.24, "ToxicConversationsClassification": 71.73, "TweetSentimentExtractionClassification": 62.33 } @@ -19148,8 +20686,8 @@ "HALClusteringS2S": 18.73, "MLSUMClusteringP2P": 42.07, "MLSUMClusteringS2S": 31.87, - "MasakhaNEWSClusteringP2P": 58.6, - "MasakhaNEWSClusteringS2S": 31.33, + "MasakhaNEWSClusteringP2P (fra)": 58.6, + "MasakhaNEWSClusteringS2S (fra)": 31.33, "MedrxivClusteringP2P": 32.4, "MedrxivClusteringS2S": 26.33, "RedditClustering": 54.53, @@ -19166,16 +20704,16 @@ "max_ap": [ { "Model": "sentence-t5-large", - "OpusparcusPC": 91.19, - "PawsXPairClassification": 59.59, + "OpusparcusPC (fr)": 91.19, + "PawsXPairClassification (fr)": 59.59, "SprintDuplicateQuestions": 89.01, "TwitterSemEval2015": 79.75, "TwitterURLCorpus": 86.14 }, { "Model": "sentence-t5-large", - "OpusparcusPC": 91.19, - "PawsXPairClassification": 59.69, + "OpusparcusPC (fr)": 91.19, + "PawsXPairClassification (fr)": 59.69, "SprintDuplicateQuestions": 89.02, "TwitterSemEval2015": 79.75, "TwitterURLCorpus": 86.14 @@ -19209,7 +20747,7 @@ "FiQA2018": 43.55, "HotpotQA": 33.95, "MSMARCO": 23.96, - "MintakaRetrieval": 23.92, + "MintakaRetrieval (fr)": 23.92, "NFCorpus": 31.1, "NQ": 42.02, "QuoraRetrieval": 85.73, @@ -19218,7 +20756,7 @@ "SyntecRetrieval": 71.05, "TRECCOVID": 46.11, "Touche2020": 21.63, - "XPQARetrieval": 48.79 + "XPQARetrieval (fr)": 48.79 } ] }, @@ -19234,10 +20772,37 @@ "STS14": 83.17, "STS15": 88.28, "STS16": 84.36, - "STS17": 45.95, - "STS22": 23.1, + "STS17 (ar-ar)": 10.75, + "STS17 (en-ar)": -4.71, + "STS17 (en-de)": 73.62, + "STS17 (en-en)": 88.99, + "STS17 (en-tr)": -0.42, + "STS17 (es-en)": 62.62, + "STS17 (es-es)": 82.74, + "STS17 (fr-en)": 67.86, + "STS17 (it-en)": 51.86, + "STS17 (ko-ko)": 9.44, + "STS17 (nl-en)": 45.95, + "STS22 (ar)": 27.01, + "STS22 (de)": 43.73, + "STS22 (de-en)": 49.93, + "STS22 (de-fr)": 61.58, + "STS22 (de-pl)": 38.83, + "STS22 (en)": 62.39, + "STS22 (es)": 57.68, + "STS22 (es-en)": 68.09, + "STS22 (es-it)": 61.58, + "STS22 (fr)": 75.01, + "STS22 (fr-pl)": 5.63, + "STS22 (it)": 62.01, + "STS22 (pl)": 25.0, + "STS22 (pl-en)": 51.72, + "STS22 (ru)": 14.21, + "STS22 (tr)": 47.3, + "STS22 (zh)": 30.47, + "STS22 (zh-en)": 23.1, "STSBenchmark": 85.36, - "STSBenchmarkMultilingualSTS": 77.59 + "STSBenchmarkMultilingualSTS (fr)": 77.59 } ] }, @@ -19262,8 +20827,122 @@ "f1": [ { "Model": "sentence-t5-xl", - "BUCC": 1.3, - "Tatoeba": 35.71 + "BUCC (de-en)": 95.04, + "BUCC (fr-en)": 94.96, + "BUCC (ru-en)": 8.33, + "BUCC (zh-en)": 1.3, + "Tatoeba (afr-eng)": 41.84, + "Tatoeba (amh-eng)": 0.03, + "Tatoeba (ang-eng)": 37.87, + "Tatoeba (ara-eng)": 0.61, + "Tatoeba (arq-eng)": 0.74, + "Tatoeba (arz-eng)": 0.42, + "Tatoeba (ast-eng)": 65.41, + "Tatoeba (awa-eng)": 1.46, + "Tatoeba (aze-eng)": 8.79, + "Tatoeba (bel-eng)": 5.76, + "Tatoeba (ben-eng)": 0.01, + "Tatoeba (ber-eng)": 5.92, + "Tatoeba (bos-eng)": 16.12, + "Tatoeba (bre-eng)": 6.12, + "Tatoeba (bul-eng)": 9.06, + "Tatoeba (cat-eng)": 57.4, + "Tatoeba (cbk-eng)": 57.68, + "Tatoeba (ceb-eng)": 12.56, + "Tatoeba (ces-eng)": 9.47, + "Tatoeba (cha-eng)": 27.13, + "Tatoeba (cmn-eng)": 1.82, + "Tatoeba (cor-eng)": 3.87, + "Tatoeba (csb-eng)": 14.41, + "Tatoeba (cym-eng)": 6.69, + "Tatoeba (dan-eng)": 54.87, + "Tatoeba (deu-eng)": 93.72, + "Tatoeba (dsb-eng)": 14.74, + "Tatoeba (dtp-eng)": 5.84, + "Tatoeba (ell-eng)": 0.6, + "Tatoeba (epo-eng)": 30.8, + "Tatoeba (est-eng)": 5.39, + "Tatoeba (eus-eng)": 11.9, + "Tatoeba (fao-eng)": 28.08, + "Tatoeba (fin-eng)": 6.81, + "Tatoeba (fra-eng)": 85.29, + "Tatoeba (fry-eng)": 38.68, + "Tatoeba (gla-eng)": 2.96, + "Tatoeba (gle-eng)": 3.74, + "Tatoeba (glg-eng)": 70.0, + "Tatoeba (gsw-eng)": 30.49, + "Tatoeba (heb-eng)": 0.87, + "Tatoeba (hin-eng)": 0.1, + "Tatoeba (hrv-eng)": 17.43, + "Tatoeba (hsb-eng)": 14.69, + "Tatoeba (hun-eng)": 7.28, + "Tatoeba (hye-eng)": 0.77, + "Tatoeba (ido-eng)": 46.65, + "Tatoeba (ile-eng)": 59.43, + "Tatoeba (ina-eng)": 82.71, + "Tatoeba (ind-eng)": 37.26, + "Tatoeba (isl-eng)": 11.21, + "Tatoeba (ita-eng)": 79.77, + "Tatoeba (jav-eng)": 7.81, + "Tatoeba (jpn-eng)": 0.91, + "Tatoeba (kab-eng)": 2.23, + "Tatoeba (kat-eng)": 1.48, + "Tatoeba (kaz-eng)": 1.77, + "Tatoeba (khm-eng)": 0.38, + "Tatoeba (kor-eng)": 1.96, + "Tatoeba (kur-eng)": 12.11, + "Tatoeba (kzj-eng)": 6.13, + "Tatoeba (lat-eng)": 27.84, + "Tatoeba (lfn-eng)": 45.89, + "Tatoeba (lit-eng)": 5.94, + "Tatoeba (lvs-eng)": 8.11, + "Tatoeba (mal-eng)": 0.59, + "Tatoeba (mar-eng)": 0.03, + "Tatoeba (max-eng)": 21.7, + "Tatoeba (mhr-eng)": 0.68, + "Tatoeba (mkd-eng)": 5.92, + "Tatoeba (mon-eng)": 2.39, + "Tatoeba (nds-eng)": 45.04, + "Tatoeba (nld-eng)": 64.75, + "Tatoeba (nno-eng)": 36.74, + "Tatoeba (nob-eng)": 54.77, + "Tatoeba (nov-eng)": 57.12, + "Tatoeba (oci-eng)": 34.39, + "Tatoeba (orv-eng)": 2.04, + "Tatoeba (pam-eng)": 8.34, + "Tatoeba (pes-eng)": 0.87, + "Tatoeba (pms-eng)": 38.06, + "Tatoeba (pol-eng)": 28.35, + "Tatoeba (por-eng)": 83.61, + "Tatoeba (ron-eng)": 65.27, + "Tatoeba (rus-eng)": 30.42, + "Tatoeba (slk-eng)": 13.19, + "Tatoeba (slv-eng)": 13.49, + "Tatoeba (spa-eng)": 89.18, + "Tatoeba (sqi-eng)": 14.66, + "Tatoeba (srp-eng)": 13.24, + "Tatoeba (swe-eng)": 60.67, + "Tatoeba (swg-eng)": 34.76, + "Tatoeba (swh-eng)": 8.07, + "Tatoeba (tam-eng)": 0.36, + "Tatoeba (tat-eng)": 1.46, + "Tatoeba (tel-eng)": 0.67, + "Tatoeba (tgl-eng)": 25.22, + "Tatoeba (tha-eng)": 1.58, + "Tatoeba (tuk-eng)": 4.99, + "Tatoeba (tur-eng)": 7.72, + "Tatoeba (tzl-eng)": 38.49, + "Tatoeba (uig-eng)": 0.87, + "Tatoeba (ukr-eng)": 9.12, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (uzb-eng)": 5.48, + "Tatoeba (vie-eng)": 8.45, + "Tatoeba (war-eng)": 13.75, + "Tatoeba (wuu-eng)": 1.44, + "Tatoeba (xho-eng)": 9.15, + "Tatoeba (yid-eng)": 0.28, + "Tatoeba (yue-eng)": 0.98, + "Tatoeba (zsm-eng)": 35.71 } ] }, @@ -19271,17 +20950,135 @@ "accuracy": [ { "Model": "sentence-t5-xl", - "AmazonCounterfactualClassification": 45.61, + "AmazonCounterfactualClassification (de)": 67.01, + "AmazonCounterfactualClassification (en)": 76.01, + "AmazonCounterfactualClassification (en-ext)": 77.29, + "AmazonCounterfactualClassification (ja)": 45.61, "AmazonPolarityClassification": 93.17, - "AmazonReviewsClassification": 21.88, + "AmazonReviewsClassification (de)": 44.05, + "AmazonReviewsClassification (en)": 48.18, + "AmazonReviewsClassification (es)": 45.01, + "AmazonReviewsClassification (fr)": 43.52, + "AmazonReviewsClassification (ja)": 22.23, + "AmazonReviewsClassification (zh)": 21.88, "Banking77Classification": 80.88, "EmotionClassification": 51.95, "ImdbClassification": 87.54, - "MTOPDomainClassification": 15.62, - "MTOPIntentClassification": 5.08, - "MasakhaNEWSClassification": 80.09, - "MassiveIntentClassification": 3.24, - "MassiveScenarioClassification": 7.14, + "MTOPDomainClassification (de)": 83.28, + "MTOPDomainClassification (en)": 90.73, + "MTOPDomainClassification (es)": 85.32, + "MTOPDomainClassification (fr)": 85.14, + "MTOPDomainClassification (hi)": 20.85, + "MTOPDomainClassification (th)": 15.62, + "MTOPIntentClassification (de)": 54.65, + "MTOPIntentClassification (en)": 68.15, + "MTOPIntentClassification (es)": 57.38, + "MTOPIntentClassification (fr)": 54.39, + "MTOPIntentClassification (hi)": 3.28, + "MTOPIntentClassification (th)": 5.08, + "MasakhaNEWSClassification (fra)": 80.09, + "MassiveIntentClassification (af)": 40.17, + "MassiveIntentClassification (am)": 2.18, + "MassiveIntentClassification (ar)": 4.18, + "MassiveIntentClassification (az)": 30.02, + "MassiveIntentClassification (bn)": 2.6, + "MassiveIntentClassification (cy)": 29.15, + "MassiveIntentClassification (da)": 47.69, + "MassiveIntentClassification (de)": 57.43, + "MassiveIntentClassification (el)": 9.96, + "MassiveIntentClassification (en)": 72.09, + "MassiveIntentClassification (es)": 57.97, + "MassiveIntentClassification (fa)": 3.6, + "MassiveIntentClassification (fi)": 34.02, + "MassiveIntentClassification (fr)": 60.99, + "MassiveIntentClassification (he)": 2.51, + "MassiveIntentClassification (hi)": 3.02, + "MassiveIntentClassification (hu)": 31.66, + "MassiveIntentClassification (hy)": 3.32, + "MassiveIntentClassification (id)": 41.53, + "MassiveIntentClassification (is)": 30.25, + "MassiveIntentClassification (it)": 56.57, + "MassiveIntentClassification (ja)": 3.5, + "MassiveIntentClassification (jv)": 31.67, + "MassiveIntentClassification (ka)": 2.79, + "MassiveIntentClassification (km)": 5.43, + "MassiveIntentClassification (kn)": 2.79, + "MassiveIntentClassification (ko)": 2.67, + "MassiveIntentClassification (lv)": 34.25, + "MassiveIntentClassification (ml)": 2.98, + "MassiveIntentClassification (mn)": 20.99, + "MassiveIntentClassification (ms)": 37.43, + "MassiveIntentClassification (my)": 4.02, + "MassiveIntentClassification (nb)": 45.91, + "MassiveIntentClassification (nl)": 50.51, + "MassiveIntentClassification (pl)": 43.95, + "MassiveIntentClassification (pt)": 57.95, + "MassiveIntentClassification (ro)": 49.37, + "MassiveIntentClassification (ru)": 33.46, + "MassiveIntentClassification (sl)": 36.33, + "MassiveIntentClassification (sq)": 37.65, + "MassiveIntentClassification (sv)": 46.35, + "MassiveIntentClassification (sw)": 30.6, + "MassiveIntentClassification (ta)": 1.79, + "MassiveIntentClassification (te)": 2.26, + "MassiveIntentClassification (th)": 4.02, + "MassiveIntentClassification (tl)": 38.92, + "MassiveIntentClassification (tr)": 32.05, + "MassiveIntentClassification (ur)": 2.7, + "MassiveIntentClassification (vi)": 21.47, + "MassiveIntentClassification (zh-CN)": 0.59, + "MassiveIntentClassification (zh-TW)": 3.24, + "MassiveScenarioClassification (af)": 50.81, + "MassiveScenarioClassification (am)": 6.95, + "MassiveScenarioClassification (ar)": 12.32, + "MassiveScenarioClassification (az)": 38.79, + "MassiveScenarioClassification (bn)": 8.0, + "MassiveScenarioClassification (cy)": 33.91, + "MassiveScenarioClassification (da)": 55.79, + "MassiveScenarioClassification (de)": 65.33, + "MassiveScenarioClassification (el)": 16.89, + "MassiveScenarioClassification (en)": 73.26, + "MassiveScenarioClassification (es)": 62.52, + "MassiveScenarioClassification (fa)": 6.08, + "MassiveScenarioClassification (fi)": 43.34, + "MassiveScenarioClassification (fr)": 66.42, + "MassiveScenarioClassification (he)": 7.55, + "MassiveScenarioClassification (hi)": 7.44, + "MassiveScenarioClassification (hu)": 40.85, + "MassiveScenarioClassification (hy)": 9.25, + "MassiveScenarioClassification (id)": 51.92, + "MassiveScenarioClassification (is)": 40.09, + "MassiveScenarioClassification (it)": 62.94, + "MassiveScenarioClassification (ja)": 7.9, + "MassiveScenarioClassification (jv)": 41.33, + "MassiveScenarioClassification (ka)": 7.76, + "MassiveScenarioClassification (km)": 9.19, + "MassiveScenarioClassification (kn)": 8.36, + "MassiveScenarioClassification (ko)": 6.13, + "MassiveScenarioClassification (lv)": 40.7, + "MassiveScenarioClassification (ml)": 6.98, + "MassiveScenarioClassification (mn)": 27.0, + "MassiveScenarioClassification (ms)": 46.9, + "MassiveScenarioClassification (my)": 9.55, + "MassiveScenarioClassification (nb)": 53.43, + "MassiveScenarioClassification (nl)": 59.65, + "MassiveScenarioClassification (pl)": 49.87, + "MassiveScenarioClassification (pt)": 62.18, + "MassiveScenarioClassification (ro)": 58.22, + "MassiveScenarioClassification (ru)": 40.73, + "MassiveScenarioClassification (sl)": 43.66, + "MassiveScenarioClassification (sq)": 49.25, + "MassiveScenarioClassification (sv)": 57.17, + "MassiveScenarioClassification (sw)": 40.55, + "MassiveScenarioClassification (ta)": 7.46, + "MassiveScenarioClassification (te)": 7.03, + "MassiveScenarioClassification (th)": 8.52, + "MassiveScenarioClassification (tl)": 51.74, + "MassiveScenarioClassification (tr)": 43.01, + "MassiveScenarioClassification (ur)": 9.61, + "MassiveScenarioClassification (vi)": 28.91, + "MassiveScenarioClassification (zh-CN)": 5.86, + "MassiveScenarioClassification (zh-TW)": 7.14, "ToxicConversationsClassification": 70.95, "TweetSentimentExtractionClassification": 61.21 } @@ -19300,8 +21097,8 @@ "HALClusteringS2S": 20.28, "MLSUMClusteringP2P": 41.61, "MLSUMClusteringS2S": 33.6, - "MasakhaNEWSClusteringP2P": 62.82, - "MasakhaNEWSClusteringS2S": 31.74, + "MasakhaNEWSClusteringP2P (fra)": 62.82, + "MasakhaNEWSClusteringS2S (fra)": 31.74, "MedrxivClusteringP2P": 32.3, "MedrxivClusteringS2S": 26.93, "RedditClustering": 57.03, @@ -19316,16 +21113,16 @@ "max_ap": [ { "Model": "sentence-t5-xl", - "OpusparcusPC": 92.48, - "PawsXPairClassification": 62.52, + "OpusparcusPC (fr)": 92.48, + "PawsXPairClassification (fr)": 62.52, "SprintDuplicateQuestions": 91.44, "TwitterSemEval2015": 80.89, "TwitterURLCorpus": 85.86 }, { "Model": "sentence-t5-xl", - "OpusparcusPC": 92.48, - "PawsXPairClassification": 62.59, + "OpusparcusPC (fr)": 92.48, + "PawsXPairClassification (fr)": 62.59, "SprintDuplicateQuestions": 91.44, "TwitterSemEval2015": 80.89, "TwitterURLCorpus": 85.86 @@ -19359,7 +21156,7 @@ "FiQA2018": 44.71, "HotpotQA": 37.17, "MSMARCO": 25.17, - "MintakaRetrieval": 31.54, + "MintakaRetrieval (fr)": 31.54, "NFCorpus": 33.18, "NQ": 46.29, "QuoraRetrieval": 85.85, @@ -19368,7 +21165,7 @@ "SyntecRetrieval": 74.24, "TRECCOVID": 54.77, "Touche2020": 22.51, - "XPQARetrieval": 52.14 + "XPQARetrieval (fr)": 52.14 } ] }, @@ -19384,10 +21181,37 @@ "STS14": 84.33, "STS15": 88.89, "STS16": 85.31, - "STS17": 66.12, - "STS22": 29.0, + "STS17 (ar-ar)": 11.13, + "STS17 (en-ar)": -3.93, + "STS17 (en-de)": 79.04, + "STS17 (en-en)": 88.91, + "STS17 (en-tr)": 13.61, + "STS17 (es-en)": 71.72, + "STS17 (es-es)": 83.42, + "STS17 (fr-en)": 71.38, + "STS17 (it-en)": 69.5, + "STS17 (ko-ko)": 9.61, + "STS17 (nl-en)": 66.12, + "STS22 (ar)": 29.6, + "STS22 (de)": 47.72, + "STS22 (de-en)": 49.64, + "STS22 (de-fr)": 62.21, + "STS22 (de-pl)": 34.34, + "STS22 (en)": 64.32, + "STS22 (es)": 58.16, + "STS22 (es-en)": 69.15, + "STS22 (es-it)": 65.26, + "STS22 (fr)": 77.49, + "STS22 (fr-pl)": 50.71, + "STS22 (it)": 66.91, + "STS22 (pl)": 27.04, + "STS22 (pl-en)": 58.85, + "STS22 (ru)": 26.63, + "STS22 (tr)": 43.36, + "STS22 (zh)": 33.55, + "STS22 (zh-en)": 29.0, "STSBenchmark": 83.93, - "STSBenchmarkMultilingualSTS": 79.42 + "STSBenchmarkMultilingualSTS (fr)": 79.42 } ] }, @@ -19415,17 +21239,22 @@ "accuracy": [ { "Model": "sentence-t5-xxl", - "AmazonCounterfactualClassification": 77.07, + "AmazonCounterfactualClassification (en)": 77.07, "AmazonPolarityClassification": 92.79, - "AmazonReviewsClassification": 46.09, + "AmazonReviewsClassification (en)": 48.93, + "AmazonReviewsClassification (fr)": 46.09, "Banking77Classification": 82.31, "EmotionClassification": 48.57, "ImdbClassification": 90.23, - "MTOPDomainClassification": 86.2, - "MTOPIntentClassification": 58.33, - "MasakhaNEWSClassification": 79.1, - "MassiveIntentClassification": 65.91, - "MassiveScenarioClassification": 68.53, + "MTOPDomainClassification (en)": 92.49, + "MTOPDomainClassification (fr)": 86.2, + "MTOPIntentClassification (en)": 68.33, + "MTOPIntentClassification (fr)": 58.33, + "MasakhaNEWSClassification (fra)": 79.1, + "MassiveIntentClassification (en)": 73.44, + "MassiveIntentClassification (fr)": 65.91, + "MassiveScenarioClassification (en)": 74.82, + "MassiveScenarioClassification (fr)": 68.53, "ToxicConversationsClassification": 70.04, "TweetSentimentExtractionClassification": 62.01 } @@ -19446,8 +21275,8 @@ "HALClusteringS2S": 21.4, "MLSUMClusteringP2P": 42.24, "MLSUMClusteringS2S": 35.25, - "MasakhaNEWSClusteringP2P": 61.15, - "MasakhaNEWSClusteringS2S": 38.24, + "MasakhaNEWSClusteringP2P (fra)": 61.15, + "MasakhaNEWSClusteringS2S (fra)": 38.24, "MedrxivClusteringP2P": 32.09, "MedrxivClusteringS2S": 26.82, "RedditClustering": 58.99, @@ -19464,16 +21293,16 @@ "max_ap": [ { "Model": "sentence-t5-xxl", - "OpusparcusPC": 93.94, - "PawsXPairClassification": 63.98, + "OpusparcusPC (fr)": 93.94, + "PawsXPairClassification (fr)": 63.98, "SprintDuplicateQuestions": 88.89, "TwitterSemEval2015": 80.28, "TwitterURLCorpus": 86.01 }, { "Model": "sentence-t5-xxl", - "OpusparcusPC": 93.94, - "PawsXPairClassification": 64.01, + "OpusparcusPC (fr)": 93.94, + "PawsXPairClassification (fr)": 64.01, "SprintDuplicateQuestions": 88.91, "TwitterSemEval2015": 80.28, "TwitterURLCorpus": 86.01 @@ -19507,7 +21336,7 @@ "FiQA2018": 46.68, "HotpotQA": 42.14, "MSMARCO": 27.67, - "MintakaRetrieval": 34.93, + "MintakaRetrieval (fr)": 34.93, "NFCorpus": 35.08, "NQ": 52.87, "QuoraRetrieval": 85.96, @@ -19516,7 +21345,7 @@ "SyntecRetrieval": 78.97, "TRECCOVID": 59.48, "Touche2020": 21.65, - "XPQARetrieval": 56.2 + "XPQARetrieval (fr)": 56.2 } ] }, @@ -19532,10 +21361,11 @@ "STS14": 84.86, "STS15": 89.32, "STS16": 84.67, - "STS17": 89.46, - "STS22": 76.8, + "STS17 (en-en)": 89.46, + "STS22 (en)": 65.33, + "STS22 (fr)": 76.8, "STSBenchmark": 84.01, - "STSBenchmarkMultilingualSTS": 81.24 + "STSBenchmarkMultilingualSTS (fr)": 81.24 } ] }, @@ -19565,8 +21395,8 @@ "Model": "silver-retriever-base-v1", "AllegroReviews": 33.35, "CBD": 68.51, - "MassiveIntentClassification": 66.63, - "MassiveScenarioClassification": 69.97, + "MassiveIntentClassification (pl)": 66.63, + "MassiveScenarioClassification (pl)": 69.97, "PAC": 66.26, "PolEmo2.0-IN": 63.52, "PolEmo2.0-OUT": 44.7 @@ -19626,7 +21456,7 @@ "Model": "silver-retriever-base-v1", "CDSC-R": 89.09, "SICK-R-PL": 67.26, - "STS22": 38.69 + "STS22 (pl)": 38.69 } ] }, @@ -19650,8 +21480,8 @@ "Model": "st-polish-paraphrase-from-distilroberta", "AllegroReviews": 34.5, "CBD": 70.27, - "MassiveIntentClassification": 64.81, - "MassiveScenarioClassification": 70.01, + "MassiveIntentClassification (pl)": 64.81, + "MassiveScenarioClassification (pl)": 70.01, "PAC": 64.6, "PolEmo2.0-IN": 67.06, "PolEmo2.0-OUT": 38.58 @@ -19711,7 +21541,7 @@ "Model": "st-polish-paraphrase-from-distilroberta", "CDSC-R": 89.62, "SICK-R-PL": 76.37, - "STS22": 40.36 + "STS22 (pl)": 40.36 } ] }, @@ -19735,8 +21565,8 @@ "Model": "st-polish-paraphrase-from-mpnet", "AllegroReviews": 34.55, "CBD": 67.48, - "MassiveIntentClassification": 65.93, - "MassiveScenarioClassification": 71.85, + "MassiveIntentClassification (pl)": 65.93, + "MassiveScenarioClassification (pl)": 71.85, "PAC": 63.25, "PolEmo2.0-IN": 68.37, "PolEmo2.0-OUT": 30.99 @@ -19796,7 +21626,7 @@ "Model": "st-polish-paraphrase-from-mpnet", "CDSC-R": 88.55, "SICK-R-PL": 76.18, - "STS22": 37.34 + "STS22 (pl)": 37.34 } ] }, @@ -19818,16 +21648,16 @@ "accuracy": [ { "Model": "sup-simcse-bert-base-uncased", - "AmazonCounterfactualClassification": 75.75, + "AmazonCounterfactualClassification (en)": 75.75, "AmazonPolarityClassification": 82.47, - "AmazonReviewsClassification": 39.6, + "AmazonReviewsClassification (en)": 39.6, "Banking77Classification": 75.76, "EmotionClassification": 44.81, "ImdbClassification": 73.53, - "MTOPDomainClassification": 84.29, - "MTOPIntentClassification": 63.14, - "MassiveIntentClassification": 65.95, - "MassiveScenarioClassification": 70.78, + "MTOPDomainClassification (en)": 84.29, + "MTOPIntentClassification (en)": 63.14, + "MassiveIntentClassification (en)": 65.95, + "MassiveScenarioClassification (en)": 70.78, "ToxicConversationsClassification": 72.04, "TweetSentimentExtractionClassification": 59.73 } @@ -19911,8 +21741,8 @@ "STS14": 80.19, "STS15": 85.4, "STS16": 80.82, - "STS17": 89.44, - "STS22": 61.96, + "STS17 (en-en)": 89.44, + "STS22 (en)": 61.96, "STSBenchmark": 84.25 } ] @@ -20018,16 +21848,16 @@ "accuracy": [ { "Model": "text-embedding-3-large", - "AmazonCounterfactualClassification": 78.93, + "AmazonCounterfactualClassification (en)": 78.93, "AmazonPolarityClassification": 92.85, - "AmazonReviewsClassification": 48.7, + "AmazonReviewsClassification (en)": 48.7, "Banking77Classification": 85.69, "EmotionClassification": 51.58, "ImdbClassification": 87.67, - "MTOPDomainClassification": 95.36, - "MTOPIntentClassification": 75.07, - "MassiveIntentClassification": 74.64, - "MassiveScenarioClassification": 79.79, + "MTOPDomainClassification (en)": 95.36, + "MTOPIntentClassification (en)": 75.07, + "MassiveIntentClassification (en)": 74.64, + "MassiveScenarioClassification (en)": 79.79, "ToxicConversationsClassification": 72.92, "TweetSentimentExtractionClassification": 62.22 } @@ -20157,8 +21987,8 @@ "STS14": 81.15, "STS15": 88.49, "STS16": 85.08, - "STS17": 90.22, - "STS22": 66.14, + "STS17 (en-en)": 90.22, + "STS22 (en)": 66.14, "STSBenchmark": 83.56 } ] @@ -20193,16 +22023,16 @@ "accuracy": [ { "Model": "text-embedding-3-large-256", - "AmazonCounterfactualClassification": 73.96, + "AmazonCounterfactualClassification (en)": 73.96, "AmazonPolarityClassification": 91.32, - "AmazonReviewsClassification": 46.03, + "AmazonReviewsClassification (en)": 46.03, "Banking77Classification": 83.19, "EmotionClassification": 45.8, "ImdbClassification": 85.93, - "MTOPDomainClassification": 92.76, - "MTOPIntentClassification": 70.45, - "MassiveIntentClassification": 71.12, - "MassiveScenarioClassification": 75.56, + "MTOPDomainClassification (en)": 92.76, + "MTOPIntentClassification (en)": 70.45, + "MassiveIntentClassification (en)": 71.12, + "MassiveScenarioClassification (en)": 75.56, "ToxicConversationsClassification": 68.52, "TweetSentimentExtractionClassification": 58.98 } @@ -20280,8 +22110,8 @@ "STS14": 80.5, "STS15": 87.51, "STS16": 84.48, - "STS17": 88.11, - "STS22": 65.92, + "STS17 (en-en)": 88.11, + "STS22 (en)": 65.92, "STSBenchmark": 82.34 } ] @@ -20360,16 +22190,16 @@ "accuracy": [ { "Model": "text-embedding-3-small", - "AmazonCounterfactualClassification": 76.42, + "AmazonCounterfactualClassification (en)": 76.42, "AmazonPolarityClassification": 90.84, - "AmazonReviewsClassification": 45.73, + "AmazonReviewsClassification (en)": 45.73, "Banking77Classification": 83.01, "EmotionClassification": 50.63, "ImdbClassification": 83.66, - "MTOPDomainClassification": 93.91, - "MTOPIntentClassification": 70.98, - "MassiveIntentClassification": 72.86, - "MassiveScenarioClassification": 76.84, + "MTOPDomainClassification (en)": 93.91, + "MTOPIntentClassification (en)": 70.98, + "MassiveIntentClassification (en)": 72.86, + "MassiveScenarioClassification (en)": 76.84, "ToxicConversationsClassification": 71.91, "TweetSentimentExtractionClassification": 61.72 } @@ -20397,14 +22227,14 @@ "max_ap": [ { "Model": "text-embedding-3-small", - "OpusparcusPC": 94.45, + "OpusparcusPC (fr)": 94.45, "SprintDuplicateQuestions": 94.58, "TwitterSemEval2015": 73.33, "TwitterURLCorpus": 87.21 }, { "Model": "text-embedding-3-small", - "OpusparcusPC": 94.45 + "OpusparcusPC (fr)": 94.45 } ] }, @@ -20467,8 +22297,8 @@ "STS14": 79.81, "STS15": 88.01, "STS16": 84.41, - "STS17": 90.94, - "STS22": 64.96, + "STS17 (en-en)": 90.94, + "STS22 (en)": 64.96, "STSBenchmark": 84.24 } ] @@ -20547,19 +22377,27 @@ "accuracy": [ { "Model": "text-embedding-ada-002", - "AmazonCounterfactualClassification": 75.94, + "AmazonCounterfactualClassification (en)": 75.94, "AmazonPolarityClassification": 86.72, - "AmazonReviewsClassification": 43.76, + "AmazonReviewsClassification (zh)": 38.3, + "AmazonReviewsClassification (en)": 44.78, + "AmazonReviewsClassification (fr)": 43.76, "Banking77Classification": 80.66, "EmotionClassification": 48.74, "IFlyTek": 44.62, "ImdbClassification": 77.98, "JDReview": 74.6, - "MTOPDomainClassification": 89.38, - "MTOPIntentClassification": 64.45, - "MasakhaNEWSClassification": 81.52, - "MassiveIntentClassification": 65.42, - "MassiveScenarioClassification": 71.11, + "MTOPDomainClassification (en)": 92.13, + "MTOPDomainClassification (fr)": 89.38, + "MTOPIntentClassification (en)": 64.68, + "MTOPIntentClassification (fr)": 64.45, + "MasakhaNEWSClassification (fra)": 81.52, + "MassiveIntentClassification (zh-CN)": 64.81, + "MassiveIntentClassification (en)": 70.15, + "MassiveIntentClassification (fr)": 65.42, + "MassiveScenarioClassification (zh-CN)": 71.4, + "MassiveScenarioClassification (en)": 75.33, + "MassiveScenarioClassification (fr)": 71.11, "MultilingualSentiment": 67.99, "OnlineShopping": 88.94, "TNews": 45.77, @@ -20584,8 +22422,8 @@ "HALClusteringS2S": 26.18, "MLSUMClusteringP2P": 44.59, "MLSUMClusteringS2S": 41.67, - "MasakhaNEWSClusteringP2P": 68.35, - "MasakhaNEWSClusteringS2S": 48.58, + "MasakhaNEWSClusteringP2P (fra)": 68.35, + "MasakhaNEWSClusteringS2S (fra)": 48.58, "MedrxivClusteringP2P": 32.6, "MedrxivClusteringS2S": 30.8, "RedditClustering": 61.42, @@ -20604,8 +22442,8 @@ "Model": "text-embedding-ada-002", "Cmnli": 76.03, "Ocnli": 63.08, - "OpusparcusPC": 94.12, - "PawsXPairClassification": 60.16, + "OpusparcusPC (fr)": 94.12, + "PawsXPairClassification (fr)": 60.16, "SprintDuplicateQuestions": 92.17, "TwitterSemEval2015": 75.28, "TwitterURLCorpus": 87.22 @@ -20614,8 +22452,8 @@ "Model": "text-embedding-ada-002", "Cmnli": 76.04, "Ocnli": 63.08, - "OpusparcusPC": 94.16, - "PawsXPairClassification": 60.19, + "OpusparcusPC (fr)": 94.16, + "PawsXPairClassification (fr)": 60.19, "SprintDuplicateQuestions": 92.17, "TwitterSemEval2015": 75.28, "TwitterURLCorpus": 87.22 @@ -20661,7 +22499,7 @@ "MMarcoRetrieval": 69.86, "MSMARCO": 40.91, "MedicalRetrieval": 37.92, - "MintakaRetrieval": 29.94, + "MintakaRetrieval (fr)": 29.94, "NFCorpus": 36.97, "NQ": 51.58, "PIQA": 31.02, @@ -20684,7 +22522,7 @@ "Touche2020": 21.61, "VideoRetrieval": 43.85, "WinoGrande": 19.65, - "XPQARetrieval": 73.0 + "XPQARetrieval (fr)": 73.0 } ] }, @@ -20706,11 +22544,14 @@ "STS14": 76.09, "STS15": 86.12, "STS16": 85.96, - "STS17": 90.25, - "STS22": 81.09, + "STS17 (en-en)": 90.25, + "STS22 (zh)": 62.53, + "STS22 (en)": 68.12, + "STS22 (tr)": 64.5, + "STS22 (fr)": 81.09, "STSB": 70.61, "STSBenchmark": 83.17, - "STSBenchmarkMultilingualSTS": 77.55 + "STSBenchmarkMultilingualSTS (fr)": 77.55 } ] }, @@ -21022,16 +22863,16 @@ "accuracy": [ { "Model": "text-similarity-ada-001", - "AmazonCounterfactualClassification": 76.4, + "AmazonCounterfactualClassification (en)": 76.4, "AmazonPolarityClassification": 92.83, - "AmazonReviewsClassification": 47.45, + "AmazonReviewsClassification (en)": 47.45, "Banking77Classification": 68.04, "EmotionClassification": 50.33, "ImdbClassification": 89.38, - "MTOPDomainClassification": 89.89, - "MTOPIntentClassification": 64.8, - "MassiveIntentClassification": 65.17, - "MassiveScenarioClassification": 67.67, + "MTOPDomainClassification (en)": 89.89, + "MTOPIntentClassification (en)": 64.8, + "MassiveIntentClassification (en)": 65.17, + "MassiveScenarioClassification (en)": 67.67, "ToxicConversationsClassification": 70.0, "TweetSentimentExtractionClassification": 63.35 } @@ -21115,8 +22956,8 @@ "STS14": 74.74, "STS15": 84.28, "STS16": 82.06, - "STS17": 87.08, - "STS22": 64.71, + "STS17 (en-en)": 87.08, + "STS22 (en)": 64.71, "STSBenchmark": 83.78 } ] @@ -21333,11 +23174,11 @@ "accuracy": [ { "Model": "text2vec-base-chinese", - "AmazonReviewsClassification": 34.12, + "AmazonReviewsClassification (zh)": 34.12, "IFlyTek": 42.05, "JDReview": 82.14, - "MassiveIntentClassification": 63.98, - "MassiveScenarioClassification": 70.52, + "MassiveIntentClassification (zh-CN)": 63.98, + "MassiveScenarioClassification (zh-CN)": 70.52, "MultilingualSentiment": 60.98, "OnlineShopping": 85.69, "TNews": 43.01, @@ -21406,7 +23247,7 @@ "LCQMC": 70.16, "PAWSX": 17.21, "QBQTC": 24.62, - "STS22": 55.35, + "STS22 (zh)": 55.35, "STSB": 79.3 } ] @@ -21429,12 +23270,12 @@ "accuracy": [ { "Model": "text2vec-base-multilingual", - "AmazonReviewsClassification": 34.25, - "MTOPDomainClassification": 71.83, - "MTOPIntentClassification": 44.53, - "MasakhaNEWSClassification": 73.84, - "MassiveIntentClassification": 51.93, - "MassiveScenarioClassification": 58.31 + "AmazonReviewsClassification (fr)": 34.25, + "MTOPDomainClassification (fr)": 71.83, + "MTOPIntentClassification (fr)": 44.53, + "MasakhaNEWSClassification (fra)": 73.84, + "MassiveIntentClassification (fr)": 51.93, + "MassiveScenarioClassification (fr)": 58.31 } ] }, @@ -21447,8 +23288,8 @@ "HALClusteringS2S": 16.19, "MLSUMClusteringP2P": 36.19, "MLSUMClusteringS2S": 30.39, - "MasakhaNEWSClusteringP2P": 38.51, - "MasakhaNEWSClusteringS2S": 32.51 + "MasakhaNEWSClusteringP2P (fra)": 38.51, + "MasakhaNEWSClusteringS2S (fra)": 32.51 } ] }, @@ -21456,13 +23297,13 @@ "max_ap": [ { "Model": "text2vec-base-multilingual", - "OpusparcusPC": 92.04, - "PawsXPairClassification": 65.57 + "OpusparcusPC (fr)": 92.04, + "PawsXPairClassification (fr)": 65.57 }, { "Model": "text2vec-base-multilingual", - "OpusparcusPC": 92.04, - "PawsXPairClassification": 65.6 + "OpusparcusPC (fr)": 92.04, + "PawsXPairClassification (fr)": 65.6 } ] }, @@ -21481,9 +23322,9 @@ "Model": "text2vec-base-multilingual", "AlloprofRetrieval": 18.9, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 14.81, + "MintakaRetrieval (fr)": 14.81, "SyntecRetrieval": 49.69, - "XPQARetrieval": 40.4 + "XPQARetrieval (fr)": 40.4 } ] }, @@ -21492,8 +23333,8 @@ { "Model": "text2vec-base-multilingual", "SICKFr": 77.25, - "STS22": 74.1, - "STSBenchmarkMultilingualSTS": 83.48 + "STS22 (fr)": 74.1, + "STSBenchmarkMultilingualSTS (fr)": 83.48 } ] }, @@ -21520,11 +23361,11 @@ "accuracy": [ { "Model": "text2vec-large-chinese", - "AmazonReviewsClassification": 33.77, + "AmazonReviewsClassification (zh)": 33.77, "IFlyTek": 41.54, "JDReview": 81.56, - "MassiveIntentClassification": 63.23, - "MassiveScenarioClassification": 68.45, + "MassiveIntentClassification (zh-CN)": 63.23, + "MassiveScenarioClassification (zh-CN)": 68.45, "MultilingualSentiment": 58.97, "OnlineShopping": 83.51, "TNews": 38.92, @@ -21593,7 +23434,7 @@ "LCQMC": 69.16, "PAWSX": 14.55, "QBQTC": 29.51, - "STS22": 65.94, + "STS22 (zh)": 65.94, "STSB": 79.45 } ] @@ -21616,7 +23457,7 @@ "accuracy": [ { "Model": "titan-embed-text-v1", - "AmazonCounterfactualClassification": 61.85, + "AmazonCounterfactualClassification (en)": 61.85, "Banking77Classification": 83.21 } ] @@ -21659,7 +23500,7 @@ "STS14": 73.71, "STS15": 82.4, "STS16": NaN, - "STS17": 80.9, + "STS17 (en-en)": 80.9, "STSBenchmark": 74.85 } ] @@ -21682,12 +23523,12 @@ "accuracy": [ { "Model": "udever-bloom-1b1", - "AmazonReviewsClassification": 35.12, - "MTOPDomainClassification": 69.24, - "MTOPIntentClassification": 51.25, - "MasakhaNEWSClassification": 80.83, - "MassiveIntentClassification": 43.21, - "MassiveScenarioClassification": 49.78 + "AmazonReviewsClassification (fr)": 35.12, + "MTOPDomainClassification (fr)": 69.24, + "MTOPIntentClassification (fr)": 51.25, + "MasakhaNEWSClassification (fra)": 80.83, + "MassiveIntentClassification (fr)": 43.21, + "MassiveScenarioClassification (fr)": 49.78 } ] }, @@ -21700,8 +23541,8 @@ "HALClusteringS2S": 13.86, "MLSUMClusteringP2P": 44.11, "MLSUMClusteringS2S": 30.47, - "MasakhaNEWSClusteringP2P": 40.2, - "MasakhaNEWSClusteringS2S": 27.35 + "MasakhaNEWSClusteringP2P (fra)": 40.2, + "MasakhaNEWSClusteringS2S (fra)": 27.35 } ] }, @@ -21709,13 +23550,13 @@ "max_ap": [ { "Model": "udever-bloom-1b1", - "OpusparcusPC": 85.54, - "PawsXPairClassification": 61.99 + "OpusparcusPC (fr)": 85.54, + "PawsXPairClassification (fr)": 61.99 }, { "Model": "udever-bloom-1b1", - "OpusparcusPC": 90.15, - "PawsXPairClassification": 63.95 + "OpusparcusPC (fr)": 90.15, + "PawsXPairClassification (fr)": 63.95 } ] }, @@ -21734,9 +23575,9 @@ "Model": "udever-bloom-1b1", "AlloprofRetrieval": 12.37, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 2.78, + "MintakaRetrieval (fr)": 2.78, "SyntecRetrieval": 40.57, - "XPQARetrieval": 33.82 + "XPQARetrieval (fr)": 33.82 } ] }, @@ -21745,8 +23586,8 @@ { "Model": "udever-bloom-1b1", "SICKFr": 59.94, - "STS22": 77.1, - "STSBenchmarkMultilingualSTS": 49.97 + "STS22 (fr)": 77.1, + "STSBenchmarkMultilingualSTS (fr)": 49.97 } ] }, @@ -21773,12 +23614,12 @@ "accuracy": [ { "Model": "udever-bloom-560m", - "AmazonReviewsClassification": 26.85, - "MTOPDomainClassification": 34.99, - "MTOPIntentClassification": 15.76, - "MasakhaNEWSClassification": 67.94, - "MassiveIntentClassification": 15.09, - "MassiveScenarioClassification": 21.67 + "AmazonReviewsClassification (fr)": 26.85, + "MTOPDomainClassification (fr)": 34.99, + "MTOPIntentClassification (fr)": 15.76, + "MasakhaNEWSClassification (fra)": 67.94, + "MassiveIntentClassification (fr)": 15.09, + "MassiveScenarioClassification (fr)": 21.67 } ] }, @@ -21791,8 +23632,8 @@ "HALClusteringS2S": 7.68, "MLSUMClusteringP2P": 36.43, "MLSUMClusteringS2S": 25.26, - "MasakhaNEWSClusteringP2P": 37.57, - "MasakhaNEWSClusteringS2S": 20.58 + "MasakhaNEWSClusteringP2P (fra)": 37.57, + "MasakhaNEWSClusteringS2S (fra)": 20.58 } ] }, @@ -21800,13 +23641,13 @@ "max_ap": [ { "Model": "udever-bloom-560m", - "OpusparcusPC": 82.1, - "PawsXPairClassification": 59.69 + "OpusparcusPC (fr)": 82.1, + "PawsXPairClassification (fr)": 59.69 }, { "Model": "udever-bloom-560m", - "OpusparcusPC": 85.87, - "PawsXPairClassification": 61.99 + "OpusparcusPC (fr)": 85.87, + "PawsXPairClassification (fr)": 61.99 } ] }, @@ -21825,9 +23666,9 @@ "Model": "udever-bloom-560m", "AlloprofRetrieval": 1.98, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 0.48, + "MintakaRetrieval (fr)": 0.48, "SyntecRetrieval": 24.45, - "XPQARetrieval": 12.98 + "XPQARetrieval (fr)": 12.98 } ] }, @@ -21836,8 +23677,8 @@ { "Model": "udever-bloom-560m", "SICKFr": 54.54, - "STS22": 61.35, - "STSBenchmarkMultilingualSTS": 36.78 + "STS22 (fr)": 61.35, + "STSBenchmarkMultilingualSTS (fr)": 36.78 } ] }, @@ -21864,12 +23705,12 @@ "accuracy": [ { "Model": "universal-sentence-encoder-multilingual-3", - "AmazonReviewsClassification": 33.51, - "MTOPDomainClassification": 85.5, - "MTOPIntentClassification": 53.98, - "MasakhaNEWSClassification": 82.06, - "MassiveIntentClassification": 61.19, - "MassiveScenarioClassification": 70.22 + "AmazonReviewsClassification (fr)": 33.51, + "MTOPDomainClassification (fr)": 85.5, + "MTOPIntentClassification (fr)": 53.98, + "MasakhaNEWSClassification (fra)": 82.06, + "MassiveIntentClassification (fr)": 61.19, + "MassiveScenarioClassification (fr)": 70.22 } ] }, @@ -21882,8 +23723,8 @@ "HALClusteringS2S": 18.95, "MLSUMClusteringP2P": 43.9, "MLSUMClusteringS2S": 35.5, - "MasakhaNEWSClusteringP2P": 60.57, - "MasakhaNEWSClusteringS2S": 40.31 + "MasakhaNEWSClusteringP2P (fra)": 60.57, + "MasakhaNEWSClusteringS2S (fra)": 40.31 } ] }, @@ -21891,13 +23732,13 @@ "max_ap": [ { "Model": "universal-sentence-encoder-multilingual-3", - "OpusparcusPC": 91.46, - "PawsXPairClassification": 52.39 + "OpusparcusPC (fr)": 91.46, + "PawsXPairClassification (fr)": 52.39 }, { "Model": "universal-sentence-encoder-multilingual-3", - "OpusparcusPC": 91.46, - "PawsXPairClassification": 52.41 + "OpusparcusPC (fr)": 91.46, + "PawsXPairClassification (fr)": 52.41 } ] }, @@ -21916,9 +23757,9 @@ "Model": "universal-sentence-encoder-multilingual-3", "AlloprofRetrieval": 35.27, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 26.12, + "MintakaRetrieval (fr)": 26.12, "SyntecRetrieval": 69.82, - "XPQARetrieval": 59.59 + "XPQARetrieval (fr)": 59.59 } ] }, @@ -21927,8 +23768,8 @@ { "Model": "universal-sentence-encoder-multilingual-3", "SICKFr": 71.37, - "STS22": 77.91, - "STSBenchmarkMultilingualSTS": 75.48 + "STS22 (fr)": 77.91, + "STSBenchmarkMultilingualSTS (fr)": 75.48 } ] }, @@ -21955,12 +23796,12 @@ "accuracy": [ { "Model": "universal-sentence-encoder-multilingual-large-3", - "AmazonReviewsClassification": 35.09, - "MTOPDomainClassification": 88.19, - "MTOPIntentClassification": 63.64, - "MasakhaNEWSClassification": 72.04, - "MassiveIntentClassification": 65.8, - "MassiveScenarioClassification": 73.47 + "AmazonReviewsClassification (fr)": 35.09, + "MTOPDomainClassification (fr)": 88.19, + "MTOPIntentClassification (fr)": 63.64, + "MasakhaNEWSClassification (fra)": 72.04, + "MassiveIntentClassification (fr)": 65.8, + "MassiveScenarioClassification (fr)": 73.47 } ] }, @@ -21973,8 +23814,8 @@ "HALClusteringS2S": 18.94, "MLSUMClusteringP2P": 41.02, "MLSUMClusteringS2S": 37.97, - "MasakhaNEWSClusteringP2P": 24.09, - "MasakhaNEWSClusteringS2S": 40.24 + "MasakhaNEWSClusteringP2P (fra)": 24.09, + "MasakhaNEWSClusteringS2S (fra)": 40.24 } ] }, @@ -21982,13 +23823,13 @@ "max_ap": [ { "Model": "universal-sentence-encoder-multilingual-large-3", - "OpusparcusPC": 93.38, - "PawsXPairClassification": 53.62 + "OpusparcusPC (fr)": 93.38, + "PawsXPairClassification (fr)": 53.62 }, { "Model": "universal-sentence-encoder-multilingual-large-3", - "OpusparcusPC": 93.38, - "PawsXPairClassification": 53.66 + "OpusparcusPC (fr)": 93.38, + "PawsXPairClassification (fr)": 53.66 } ] }, @@ -22007,9 +23848,9 @@ "Model": "universal-sentence-encoder-multilingual-large-3", "AlloprofRetrieval": 33.78, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 26.21, + "MintakaRetrieval (fr)": 26.21, "SyntecRetrieval": 63.69, - "XPQARetrieval": 65.21 + "XPQARetrieval (fr)": 65.21 } ] }, @@ -22018,8 +23859,8 @@ { "Model": "universal-sentence-encoder-multilingual-large-3", "SICKFr": 74.39, - "STS22": 71.11, - "STSBenchmarkMultilingualSTS": 78.16 + "STS22 (fr)": 71.11, + "STSBenchmarkMultilingualSTS (fr)": 78.16 } ] }, @@ -22046,16 +23887,16 @@ "accuracy": [ { "Model": "unsup-simcse-bert-base-uncased", - "AmazonCounterfactualClassification": 67.09, + "AmazonCounterfactualClassification (en)": 67.09, "AmazonPolarityClassification": 74.48, - "AmazonReviewsClassification": 33.85, + "AmazonReviewsClassification (en)": 33.85, "Banking77Classification": 73.55, "EmotionClassification": 42.22, "ImdbClassification": 69.63, - "MTOPDomainClassification": 81.71, - "MTOPIntentClassification": 59.23, - "MassiveIntentClassification": 59.84, - "MassiveScenarioClassification": 66.25, + "MTOPDomainClassification (en)": 81.71, + "MTOPIntentClassification (en)": 59.23, + "MassiveIntentClassification (en)": 59.84, + "MassiveScenarioClassification (en)": 66.25, "ToxicConversationsClassification": 68.82, "TweetSentimentExtractionClassification": 53.36 } @@ -22139,8 +23980,8 @@ "STS14": 73.61, "STS15": 79.72, "STS16": 78.12, - "STS17": 83.58, - "STS22": 59.65, + "STS17 (en-en)": 83.58, + "STS22 (en)": 59.65, "STSBenchmark": 76.52 } ] @@ -22208,12 +24049,12 @@ "accuracy": [ { "Model": "voyage-2", - "AmazonReviewsClassification": 37.26, - "MTOPDomainClassification": 79.79, - "MTOPIntentClassification": 45.62, - "MasakhaNEWSClassification": 80.19, - "MassiveIntentClassification": 53.7, - "MassiveScenarioClassification": 62.46 + "AmazonReviewsClassification (fr)": 37.26, + "MTOPDomainClassification (fr)": 79.79, + "MTOPIntentClassification (fr)": 45.62, + "MasakhaNEWSClassification (fra)": 80.19, + "MassiveIntentClassification (fr)": 53.7, + "MassiveScenarioClassification (fr)": 62.46 } ] }, @@ -22226,8 +24067,8 @@ "HALClusteringS2S": 24.84, "MLSUMClusteringP2P": 45.08, "MLSUMClusteringS2S": 38.77, - "MasakhaNEWSClusteringP2P": 48.54, - "MasakhaNEWSClusteringS2S": 36.33 + "MasakhaNEWSClusteringP2P (fra)": 48.54, + "MasakhaNEWSClusteringS2S (fra)": 36.33 } ] }, @@ -22235,13 +24076,13 @@ "max_ap": [ { "Model": "voyage-2", - "OpusparcusPC": 89.76, - "PawsXPairClassification": 58.96 + "OpusparcusPC (fr)": 89.76, + "PawsXPairClassification (fr)": 58.96 }, { "Model": "voyage-2", - "OpusparcusPC": 89.83, - "PawsXPairClassification": 58.97 + "OpusparcusPC (fr)": 89.83, + "PawsXPairClassification (fr)": 58.97 } ] }, @@ -22260,9 +24101,9 @@ "Model": "voyage-2", "AlloprofRetrieval": 45.5, "BSARDRetrieval": 0.15, - "MintakaRetrieval": 15.51, + "MintakaRetrieval (fr)": 15.51, "SyntecRetrieval": 75.83, - "XPQARetrieval": 67.07 + "XPQARetrieval (fr)": 67.07 } ] }, @@ -22271,8 +24112,8 @@ { "Model": "voyage-2", "SICKFr": 68.51, - "STS22": 70.51, - "STSBenchmarkMultilingualSTS": 76.43 + "STS22 (fr)": 70.51, + "STSBenchmarkMultilingualSTS (fr)": 76.43 } ] }, @@ -22299,12 +24140,12 @@ "accuracy": [ { "Model": "voyage-code-2", - "AmazonReviewsClassification": 42.15, - "MTOPDomainClassification": 87.68, - "MTOPIntentClassification": 59.44, - "MasakhaNEWSClassification": 82.13, - "MassiveIntentClassification": 63.08, - "MassiveScenarioClassification": 70.15 + "AmazonReviewsClassification (fr)": 42.15, + "MTOPDomainClassification (fr)": 87.68, + "MTOPIntentClassification (fr)": 59.44, + "MasakhaNEWSClassification (fra)": 82.13, + "MassiveIntentClassification (fr)": 63.08, + "MassiveScenarioClassification (fr)": 70.15 } ] }, @@ -22317,8 +24158,8 @@ "HALClusteringS2S": 27.44, "MLSUMClusteringP2P": 45.23, "MLSUMClusteringS2S": 41.48, - "MasakhaNEWSClusteringP2P": 56.59, - "MasakhaNEWSClusteringS2S": 35.18 + "MasakhaNEWSClusteringP2P (fra)": 56.59, + "MasakhaNEWSClusteringS2S (fra)": 35.18 } ] }, @@ -22326,13 +24167,13 @@ "max_ap": [ { "Model": "voyage-code-2", - "OpusparcusPC": 92.87, - "PawsXPairClassification": 60.83 + "OpusparcusPC (fr)": 92.87, + "PawsXPairClassification (fr)": 60.83 }, { "Model": "voyage-code-2", - "OpusparcusPC": 92.87, - "PawsXPairClassification": 60.88 + "OpusparcusPC (fr)": 92.87, + "PawsXPairClassification (fr)": 60.88 } ] }, @@ -22351,9 +24192,9 @@ "Model": "voyage-code-2", "AlloprofRetrieval": 52.61, "BSARDRetrieval": 0.29, - "MintakaRetrieval": 19.05, + "MintakaRetrieval (fr)": 19.05, "SyntecRetrieval": 82.77, - "XPQARetrieval": 71.95 + "XPQARetrieval (fr)": 71.95 } ] }, @@ -22362,8 +24203,8 @@ { "Model": "voyage-code-2", "SICKFr": 73.56, - "STS22": 79.99, - "STSBenchmarkMultilingualSTS": 79.02 + "STS22 (fr)": 79.99, + "STSBenchmarkMultilingualSTS (fr)": 79.02 } ] }, @@ -22390,16 +24231,16 @@ "accuracy": [ { "Model": "voyage-large-2-instruct", - "AmazonCounterfactualClassification": 77.6, + "AmazonCounterfactualClassification (en)": 77.6, "AmazonPolarityClassification": 96.58, - "AmazonReviewsClassification": 50.77, + "AmazonReviewsClassification (en)": 50.77, "Banking77Classification": 86.96, "EmotionClassification": 59.81, "ImdbClassification": 96.13, - "MTOPDomainClassification": 98.86, - "MTOPIntentClassification": 86.97, - "MassiveIntentClassification": 81.08, - "MassiveScenarioClassification": 87.95, + "MTOPDomainClassification (en)": 98.86, + "MTOPIntentClassification (en)": 86.97, + "MassiveIntentClassification (en)": 81.08, + "MassiveScenarioClassification (en)": 87.95, "ToxicConversationsClassification": 83.58, "TweetSentimentExtractionClassification": 71.55 } @@ -22508,8 +24349,8 @@ "STS14": 86.49, "STS15": 91.13, "STS16": 85.68, - "STS17": 90.06, - "STS22": 66.32, + "STS17 (en-en)": 90.06, + "STS22 (en)": 66.32, "STSBenchmark": 89.22 } ] @@ -22537,12 +24378,12 @@ "accuracy": [ { "Model": "voyage-law-2", - "AmazonReviewsClassification": 41.98, - "MTOPDomainClassification": 90.12, - "MTOPIntentClassification": 62.44, - "MasakhaNEWSClassification": 76.42, - "MassiveIntentClassification": 66.94, - "MassiveScenarioClassification": 72.78 + "AmazonReviewsClassification (fr)": 41.98, + "MTOPDomainClassification (fr)": 90.12, + "MTOPIntentClassification (fr)": 62.44, + "MasakhaNEWSClassification (fra)": 76.42, + "MassiveIntentClassification (fr)": 66.94, + "MassiveScenarioClassification (fr)": 72.78 } ] }, @@ -22553,10 +24394,10 @@ "AlloProfClusteringP2P": 62.5, "AlloProfClusteringS2S": 44.28, "HALClusteringS2S": 26.36, - "MLSUMClusteringP2P": 44.03, - "MLSUMClusteringS2S": 42.95, - "MasakhaNEWSClusteringP2P": 50.68, - "MasakhaNEWSClusteringS2S": 38.79 + "MLSUMClusteringP2P (fr)": 44.03, + "MLSUMClusteringS2S (fr)": 42.95, + "MasakhaNEWSClusteringP2P (fra)": 50.68, + "MasakhaNEWSClusteringS2S (fra)": 38.79 } ] }, @@ -22564,13 +24405,13 @@ "max_ap": [ { "Model": "voyage-law-2", - "OpusparcusPC": 93.06, - "PawsXPairClassification": 61.54 + "OpusparcusPC (fr)": 93.06, + "PawsXPairClassification (fr)": 61.54 }, { "Model": "voyage-law-2", - "OpusparcusPC": 93.06, - "PawsXPairClassification": 61.54 + "OpusparcusPC (fr)": 93.06, + "PawsXPairClassification (fr)": 61.54 } ] }, @@ -22601,9 +24442,9 @@ "LegalBenchCorporateLobbying": 95.66, "LegalQuAD": 67.47, "LegalSummarization": 68.96, - "MintakaRetrieval": 34.92, + "MintakaRetrieval (fr)": 34.92, "SyntecRetrieval": 87.33, - "XPQARetrieval": 73.56 + "XPQARetrieval (fr)": 73.56 } ] }, @@ -22612,8 +24453,8 @@ { "Model": "voyage-law-2", "SICKFr": 74.09, - "STS22": 83.75, - "STSBenchmarkMultilingualSTS": 83.02 + "STS22 (fr)": 83.75, + "STSBenchmarkMultilingualSTS (fr)": 83.02 } ] }, @@ -22640,16 +24481,16 @@ "accuracy": [ { "Model": "voyage-lite-01-instruct", - "AmazonCounterfactualClassification": 71.43, + "AmazonCounterfactualClassification (en)": 71.43, "AmazonPolarityClassification": 96.41, - "AmazonReviewsClassification": 57.06, + "AmazonReviewsClassification (en)": 57.06, "Banking77Classification": 81.64, "EmotionClassification": 48.29, "ImdbClassification": 95.49, - "MTOPDomainClassification": 96.3, - "MTOPIntentClassification": 67.93, - "MassiveIntentClassification": 71.29, - "MassiveScenarioClassification": 76.74, + "MTOPDomainClassification (en)": 96.3, + "MTOPIntentClassification (en)": 67.93, + "MassiveIntentClassification (en)": 71.29, + "MassiveScenarioClassification (en)": 76.74, "ToxicConversationsClassification": 75.45, "TweetSentimentExtractionClassification": 59.44 } @@ -22733,8 +24574,8 @@ "STS14": 82.08, "STS15": 89.21, "STS16": 84.74, - "STS17": 90.73, - "STS22": 62.1, + "STS17 (en-en)": 90.73, + "STS22 (en)": 62.1, "STSBenchmark": 89.86 } ] @@ -22762,16 +24603,16 @@ "accuracy": [ { "Model": "voyage-lite-02-instruct", - "AmazonCounterfactualClassification": 88.31, + "AmazonCounterfactualClassification (en)": 88.31, "AmazonPolarityClassification": 96.32, - "AmazonReviewsClassification": 56.25, + "AmazonReviewsClassification (en)": 56.25, "Banking77Classification": 88.59, "EmotionClassification": 50.28, "ImdbClassification": 95.75, - "MTOPDomainClassification": 97.65, - "MTOPIntentClassification": 75.16, - "MassiveIntentClassification": 73.97, - "MassiveScenarioClassification": 83.99, + "MTOPDomainClassification (en)": 97.65, + "MTOPIntentClassification (en)": 75.16, + "MassiveIntentClassification (en)": 73.97, + "MassiveScenarioClassification (en)": 83.99, "ToxicConversationsClassification": 81.75, "TweetSentimentExtractionClassification": 62.98 } @@ -22855,8 +24696,8 @@ "STS14": 86.6, "STS15": 90.1, "STS16": 86.39, - "STS17": 86.98, - "STS22": 76.89, + "STS17 (en-en)": 86.98, + "STS22 (en)": 76.89, "STSBenchmark": 88.56 } ] @@ -22884,12 +24725,12 @@ "accuracy": [ { "Model": "voyage-multilingual-2", - "AmazonReviewsClassification": 43.36, - "MTOPDomainClassification": 90.33, - "MTOPIntentClassification": 60.52, - "MasakhaNEWSClassification": 74.81, - "MassiveIntentClassification": 68.06, - "MassiveScenarioClassification": 74.29 + "AmazonReviewsClassification (fr)": 43.36, + "MTOPDomainClassification (fr)": 90.33, + "MTOPIntentClassification (fr)": 60.52, + "MasakhaNEWSClassification (fra)": 74.81, + "MassiveIntentClassification (fr)": 68.06, + "MassiveScenarioClassification (fr)": 74.29 } ] }, @@ -22900,10 +24741,10 @@ "AlloProfClusteringP2P": 65.37, "AlloProfClusteringS2S": 47.03, "HALClusteringS2S": 27.67, - "MLSUMClusteringP2P": 45.99, - "MLSUMClusteringS2S": 45.57, - "MasakhaNEWSClusteringP2P": 44.53, - "MasakhaNEWSClusteringS2S": 49.8 + "MLSUMClusteringP2P (fr)": 45.99, + "MLSUMClusteringS2S (fr)": 45.57, + "MasakhaNEWSClusteringP2P (fra)": 44.53, + "MasakhaNEWSClusteringS2S (fra)": 49.8 } ] }, @@ -22911,13 +24752,13 @@ "max_ap": [ { "Model": "voyage-multilingual-2", - "OpusparcusPC": 93.68, - "PawsXPairClassification": 63.64 + "OpusparcusPC (fr)": 93.68, + "PawsXPairClassification (fr)": 63.64 }, { "Model": "voyage-multilingual-2", - "OpusparcusPC": 93.68, - "PawsXPairClassification": 63.71 + "OpusparcusPC (fr)": 93.68, + "PawsXPairClassification (fr)": 63.71 } ] }, @@ -22940,9 +24781,9 @@ "LEMBQMSumRetrieval": 51.49, "LEMBSummScreenFDRetrieval": 99.11, "LEMBWikimQARetrieval": 87.49, - "MintakaRetrieval": 49.19, + "MintakaRetrieval (fr)": 49.19, "SyntecRetrieval": 87.28, - "XPQARetrieval": 72.92 + "XPQARetrieval (fr)": 72.92 } ] }, @@ -22951,8 +24792,8 @@ { "Model": "voyage-multilingual-2", "SICKFr": 74.9, - "STS22": 82.76, - "STSBenchmarkMultilingualSTS": 82.72 + "STS22 (fr)": 82.76, + "STSBenchmarkMultilingualSTS (fr)": 82.72 } ] }, @@ -22984,16 +24825,22 @@ "accuracy": [ { "Model": "xlm-roberta-base", - "AmazonReviewsClassification": 26.75, + "AmazonReviewsClassification (fr)": 26.75, "AngryTweetsClassification": 52.41, "DKHateClassification": 56.78, "DanishPoliticalCommentsClassification": 34.03, "LccSentimentClassification": 52.27, - "MTOPDomainClassification": 43.83, - "MTOPIntentClassification": 19.38, - "MasakhaNEWSClassification": 60.5, - "MassiveIntentClassification": 13.58, - "MassiveScenarioClassification": 23.21, + "MTOPDomainClassification (fr)": 43.83, + "MTOPIntentClassification (fr)": 19.38, + "MasakhaNEWSClassification (fra)": 60.5, + "MassiveIntentClassification (da)": 41.06, + "MassiveIntentClassification (nb)": 40.46, + "MassiveIntentClassification (sv)": 45.12, + "MassiveIntentClassification (fr)": 13.58, + "MassiveScenarioClassification (da)": 43.91, + "MassiveScenarioClassification (nb)": 44.83, + "MassiveScenarioClassification (sv)": 47.35, + "MassiveScenarioClassification (fr)": 23.21, "NoRecClassification": 46.28, "NordicLangClassification": 79.39, "NorwegianParliament": 56.75, @@ -23011,8 +24858,8 @@ "HALClusteringS2S": 8.68, "MLSUMClusteringP2P": 40.44, "MLSUMClusteringS2S": 24.14, - "MasakhaNEWSClusteringP2P": 29.29, - "MasakhaNEWSClusteringS2S": 23.76 + "MasakhaNEWSClusteringP2P (fra)": 29.29, + "MasakhaNEWSClusteringS2S (fra)": 23.76 } ] }, @@ -23020,13 +24867,13 @@ "max_ap": [ { "Model": "xlm-roberta-base", - "OpusparcusPC": 85.45, - "PawsXPairClassification": 51.35 + "OpusparcusPC (fr)": 85.45, + "PawsXPairClassification (fr)": 51.35 }, { "Model": "xlm-roberta-base", - "OpusparcusPC": 85.91, - "PawsXPairClassification": 51.73 + "OpusparcusPC (fr)": 85.91, + "PawsXPairClassification (fr)": 51.73 } ] }, @@ -23045,9 +24892,9 @@ "Model": "xlm-roberta-base", "AlloprofRetrieval": 0.16, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 0.88, + "MintakaRetrieval (fr)": 0.88, "SyntecRetrieval": 3.33, - "XPQARetrieval": 11.65 + "XPQARetrieval (fr)": 11.65 } ] }, @@ -23056,8 +24903,8 @@ { "Model": "xlm-roberta-base", "SICKFr": 48.62, - "STS22": 56.72, - "STSBenchmarkMultilingualSTS": 46.23 + "STS22 (fr)": 56.72, + "STSBenchmarkMultilingualSTS (fr)": 46.23 } ] }, @@ -23084,12 +24931,12 @@ "accuracy": [ { "Model": "xlm-roberta-large", - "AmazonReviewsClassification": 26.62, - "MTOPDomainClassification": 36.77, - "MTOPIntentClassification": 15.37, - "MasakhaNEWSClassification": 65.76, - "MassiveIntentClassification": 15.82, - "MassiveScenarioClassification": 23.92 + "AmazonReviewsClassification (fr)": 26.62, + "MTOPDomainClassification (fr)": 36.77, + "MTOPIntentClassification (fr)": 15.37, + "MasakhaNEWSClassification (fra)": 65.76, + "MassiveIntentClassification (fr)": 15.82, + "MassiveScenarioClassification (fr)": 23.92 } ] }, @@ -23104,8 +24951,8 @@ "HALClusteringS2S": 5.94, "MLSUMClusteringP2P": 42.67, "MLSUMClusteringS2S": 18.5, - "MasakhaNEWSClusteringP2P": 34.02, - "MasakhaNEWSClusteringS2S": 21.52, + "MasakhaNEWSClusteringP2P (fra)": 34.02, + "MasakhaNEWSClusteringS2S (fra)": 21.52, "TenKGnadClusteringP2P": 32.46, "TenKGnadClusteringS2S": 6.16 } @@ -23115,13 +24962,13 @@ "max_ap": [ { "Model": "xlm-roberta-large", - "OpusparcusPC": 83.73, - "PawsXPairClassification": 53.38 + "OpusparcusPC (fr)": 83.73, + "PawsXPairClassification (fr)": 53.38 }, { "Model": "xlm-roberta-large", - "OpusparcusPC": 83.79, - "PawsXPairClassification": 53.44 + "OpusparcusPC (fr)": 83.79, + "PawsXPairClassification (fr)": 53.44 } ] }, @@ -23140,9 +24987,9 @@ "Model": "xlm-roberta-large", "AlloprofRetrieval": 0.52, "BSARDRetrieval": 0.0, - "MintakaRetrieval": 0.9, + "MintakaRetrieval (fr)": 0.9, "SyntecRetrieval": 6.6, - "XPQARetrieval": 12.7 + "XPQARetrieval (fr)": 12.7 } ] }, @@ -23151,8 +24998,8 @@ { "Model": "xlm-roberta-large", "SICKFr": 50.01, - "STS22": 55.49, - "STSBenchmarkMultilingualSTS": 42.32 + "STS22 (fr)": 55.49, + "STSBenchmarkMultilingualSTS (fr)": 42.32 } ] },