Spaces:
Runtime error
Runtime error
Commit
•
9b3b05a
1
Parent(s):
f6a1b1b
initial commit
Browse files
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
results
|
README.md
CHANGED
@@ -3,10 +3,10 @@ title: Test
|
|
3 |
emoji: 🔥
|
4 |
colorFrom: blue
|
5 |
colorTo: green
|
6 |
-
sdk:
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
3 |
emoji: 🔥
|
4 |
colorFrom: blue
|
5 |
colorTo: green
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.10.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
|
app.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
import streamlit as st
|
5 |
+
from tasks import TASKS
|
6 |
+
from huggingface_hub import HfApi
|
7 |
+
import datasets
|
8 |
+
import os
|
9 |
+
|
10 |
+
FORMATTED_TASK_NAMES = TASKS
|
11 |
+
api = HfApi()
|
12 |
+
|
13 |
+
def download_submissions():
|
14 |
+
submissions = api.list_datasets(
|
15 |
+
filter=("benchmark:mteb"), use_auth_token=os.getenv("HF_HUB_TOKEN")
|
16 |
+
)
|
17 |
+
return submissions
|
18 |
+
|
19 |
+
def format_submissions(submissions):
|
20 |
+
submission_data = {**{"Submitter": []}, **{"Submission Name": []}, **{"Submission Date": []}, **{t: [] for t in TASKS}}
|
21 |
+
# The following picks the latest submissions which adhere to the model card schema
|
22 |
+
for submission in submissions:
|
23 |
+
submission_id = submission.id
|
24 |
+
try:
|
25 |
+
data = list(datasets.load_dataset(submission_id, streaming=True, data_files="*csv").values())[0]
|
26 |
+
except FileNotFoundError:
|
27 |
+
print("FileNotFoundError")
|
28 |
+
continue
|
29 |
+
for line in data:
|
30 |
+
if line['dataset'] in submission_data:
|
31 |
+
submission_data[line['dataset']].append(line['value'])
|
32 |
+
if len(submission_data['Submission Name']) == 0 or line['model'] != submission_data['Submission Name'][-1]:
|
33 |
+
submission_data['Submission Name'].append(line['model'])
|
34 |
+
while len(submission_data['Submission Name']) > len(submission_data['Submitter']):
|
35 |
+
submission_data['Submitter'].append(submission.author)
|
36 |
+
submission_data["Submission Date"].append(pd.to_datetime(submission.lastModified).strftime("%b %d, %Y"))
|
37 |
+
|
38 |
+
|
39 |
+
df = pd.DataFrame(submission_data)
|
40 |
+
df.insert(3, "Overall", df[TASKS].mean(axis=1))
|
41 |
+
df = df.copy().sort_values("Overall", ascending=False)
|
42 |
+
df.rename(columns={k: v for k, v in zip(TASKS, FORMATTED_TASK_NAMES)}, inplace=True)
|
43 |
+
# Start ranking from 1
|
44 |
+
df.insert(0, "Rank", np.arange(1, len(df) + 1))
|
45 |
+
return df
|
46 |
+
|
47 |
+
|
48 |
+
###########
|
49 |
+
### APP ###
|
50 |
+
###########
|
51 |
+
st.set_page_config(layout="wide")
|
52 |
+
st.title("MTEB: Massive Text Embedding Benchmark")
|
53 |
+
st.markdown(
|
54 |
+
"""
|
55 |
+
...
|
56 |
+
"""
|
57 |
+
)
|
58 |
+
submissions = download_submissions()
|
59 |
+
df = format_submissions(submissions)
|
60 |
+
styler = df.style.set_precision(3).set_properties(**{"white-space": "pre-wrap", "text-align": "center"})
|
61 |
+
# hack to remove index column: https://discuss.streamlit.io/t/questions-on-st-table/6878/3
|
62 |
+
st.markdown(
|
63 |
+
"""
|
64 |
+
<style>
|
65 |
+
table td:nth-child(1) {
|
66 |
+
display: none
|
67 |
+
}
|
68 |
+
table th:nth-child(1) {
|
69 |
+
display: none
|
70 |
+
}
|
71 |
+
</style>
|
72 |
+
""",
|
73 |
+
unsafe_allow_html=True,
|
74 |
+
)
|
75 |
+
st.table(styler)
|
tasks.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
TASK_LIST_CLASSIFICATION = [
|
2 |
+
"AmazonCounterfactualClassification",
|
3 |
+
"AmazonPolarityClassification",
|
4 |
+
"AmazonReviewsClassification",
|
5 |
+
"Banking77Classification",
|
6 |
+
"EmotionClassification",
|
7 |
+
"ImdbClassification",
|
8 |
+
"MassiveIntentClassification",
|
9 |
+
"MassiveScenarioClassification",
|
10 |
+
"MTOPDomainClassification",
|
11 |
+
"MTOPIntentClassification",
|
12 |
+
"ToxicConversationsClassification",
|
13 |
+
"TweetSentimentExtractionClassification",
|
14 |
+
]
|
15 |
+
|
16 |
+
TASK_LIST_CLUSTERING = [
|
17 |
+
"ArxivClusteringP2P",
|
18 |
+
"ArxivClusteringS2S",
|
19 |
+
"BiorxivClusteringP2P",
|
20 |
+
"BiorxivClusteringS2S",
|
21 |
+
"MedrxivClusteringP2P",
|
22 |
+
"MedrxivClusteringS2S",
|
23 |
+
"RedditClustering",
|
24 |
+
"RedditClusteringP2P",
|
25 |
+
"StackExchangeClustering",
|
26 |
+
"StackExchangeClusteringP2P",
|
27 |
+
"TwentyNewsgroupsClustering",
|
28 |
+
]
|
29 |
+
|
30 |
+
TASK_LIST_PAIR_CLASSIFICATION = [
|
31 |
+
"SprintDuplicateQuestions",
|
32 |
+
"TwitterSemEval2015",
|
33 |
+
"TwitterURLCorpus",
|
34 |
+
]
|
35 |
+
|
36 |
+
TASK_LIST_RERANKING = [
|
37 |
+
"AskUbuntuDupQuestions",
|
38 |
+
"MindSmallReranking",
|
39 |
+
"SciDocsRR",
|
40 |
+
"StackOverflowDupQuestions",
|
41 |
+
]
|
42 |
+
|
43 |
+
TASK_LIST_RETRIEVAL = [
|
44 |
+
"ArguAna",
|
45 |
+
"ClimateFEVER",
|
46 |
+
"CQADupstackRetrieval",
|
47 |
+
"DBPedia",
|
48 |
+
"FEVER",
|
49 |
+
"FiQA2018",
|
50 |
+
"HotpotQA",
|
51 |
+
"MSMARCO",
|
52 |
+
"NFCorpus",
|
53 |
+
"NQ",
|
54 |
+
"QuoraRetrieval",
|
55 |
+
"SCIDOCS",
|
56 |
+
"SciFact",
|
57 |
+
"Touche2020",
|
58 |
+
"TRECCOVID",
|
59 |
+
]
|
60 |
+
|
61 |
+
TASK_LIST_STS = [
|
62 |
+
"BIOSSES",
|
63 |
+
"SICK-R",
|
64 |
+
"STS12",
|
65 |
+
"STS13",
|
66 |
+
"STS14",
|
67 |
+
"STS15",
|
68 |
+
"STS16",
|
69 |
+
"STS17",
|
70 |
+
"STS22",
|
71 |
+
"STSBenchmark",
|
72 |
+
"SummEval",
|
73 |
+
]
|
74 |
+
|
75 |
+
TASKS = (
|
76 |
+
TASK_LIST_CLASSIFICATION
|
77 |
+
+ TASK_LIST_CLUSTERING
|
78 |
+
+ TASK_LIST_PAIR_CLASSIFICATION
|
79 |
+
+ TASK_LIST_RERANKING
|
80 |
+
+ TASK_LIST_RETRIEVAL
|
81 |
+
+ TASK_LIST_STS
|
82 |
+
)
|