nouamanetazi HF staff commited on
Commit
9b3b05a
1 Parent(s): f6a1b1b

initial commit

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +3 -3
  3. app.py +75 -0
  4. tasks.py +82 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ results
README.md CHANGED
@@ -3,10 +3,10 @@ title: Test
3
  emoji: 🔥
4
  colorFrom: blue
5
  colorTo: green
6
- sdk: gradio
7
- sdk_version: 3.2
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
3
  emoji: 🔥
4
  colorFrom: blue
5
  colorTo: green
6
+ sdk: streamlit
7
+ sdk_version: 1.10.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import requests
4
+ import streamlit as st
5
+ from tasks import TASKS
6
+ from huggingface_hub import HfApi
7
+ import datasets
8
+ import os
9
+
10
+ FORMATTED_TASK_NAMES = TASKS
11
+ api = HfApi()
12
+
13
+ def download_submissions():
14
+ submissions = api.list_datasets(
15
+ filter=("benchmark:mteb"), use_auth_token=os.getenv("HF_HUB_TOKEN")
16
+ )
17
+ return submissions
18
+
19
+ def format_submissions(submissions):
20
+ submission_data = {**{"Submitter": []}, **{"Submission Name": []}, **{"Submission Date": []}, **{t: [] for t in TASKS}}
21
+ # The following picks the latest submissions which adhere to the model card schema
22
+ for submission in submissions:
23
+ submission_id = submission.id
24
+ try:
25
+ data = list(datasets.load_dataset(submission_id, streaming=True, data_files="*csv").values())[0]
26
+ except FileNotFoundError:
27
+ print("FileNotFoundError")
28
+ continue
29
+ for line in data:
30
+ if line['dataset'] in submission_data:
31
+ submission_data[line['dataset']].append(line['value'])
32
+ if len(submission_data['Submission Name']) == 0 or line['model'] != submission_data['Submission Name'][-1]:
33
+ submission_data['Submission Name'].append(line['model'])
34
+ while len(submission_data['Submission Name']) > len(submission_data['Submitter']):
35
+ submission_data['Submitter'].append(submission.author)
36
+ submission_data["Submission Date"].append(pd.to_datetime(submission.lastModified).strftime("%b %d, %Y"))
37
+
38
+
39
+ df = pd.DataFrame(submission_data)
40
+ df.insert(3, "Overall", df[TASKS].mean(axis=1))
41
+ df = df.copy().sort_values("Overall", ascending=False)
42
+ df.rename(columns={k: v for k, v in zip(TASKS, FORMATTED_TASK_NAMES)}, inplace=True)
43
+ # Start ranking from 1
44
+ df.insert(0, "Rank", np.arange(1, len(df) + 1))
45
+ return df
46
+
47
+
48
+ ###########
49
+ ### APP ###
50
+ ###########
51
+ st.set_page_config(layout="wide")
52
+ st.title("MTEB: Massive Text Embedding Benchmark")
53
+ st.markdown(
54
+ """
55
+ ...
56
+ """
57
+ )
58
+ submissions = download_submissions()
59
+ df = format_submissions(submissions)
60
+ styler = df.style.set_precision(3).set_properties(**{"white-space": "pre-wrap", "text-align": "center"})
61
+ # hack to remove index column: https://discuss.streamlit.io/t/questions-on-st-table/6878/3
62
+ st.markdown(
63
+ """
64
+ <style>
65
+ table td:nth-child(1) {
66
+ display: none
67
+ }
68
+ table th:nth-child(1) {
69
+ display: none
70
+ }
71
+ </style>
72
+ """,
73
+ unsafe_allow_html=True,
74
+ )
75
+ st.table(styler)
tasks.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TASK_LIST_CLASSIFICATION = [
2
+ "AmazonCounterfactualClassification",
3
+ "AmazonPolarityClassification",
4
+ "AmazonReviewsClassification",
5
+ "Banking77Classification",
6
+ "EmotionClassification",
7
+ "ImdbClassification",
8
+ "MassiveIntentClassification",
9
+ "MassiveScenarioClassification",
10
+ "MTOPDomainClassification",
11
+ "MTOPIntentClassification",
12
+ "ToxicConversationsClassification",
13
+ "TweetSentimentExtractionClassification",
14
+ ]
15
+
16
+ TASK_LIST_CLUSTERING = [
17
+ "ArxivClusteringP2P",
18
+ "ArxivClusteringS2S",
19
+ "BiorxivClusteringP2P",
20
+ "BiorxivClusteringS2S",
21
+ "MedrxivClusteringP2P",
22
+ "MedrxivClusteringS2S",
23
+ "RedditClustering",
24
+ "RedditClusteringP2P",
25
+ "StackExchangeClustering",
26
+ "StackExchangeClusteringP2P",
27
+ "TwentyNewsgroupsClustering",
28
+ ]
29
+
30
+ TASK_LIST_PAIR_CLASSIFICATION = [
31
+ "SprintDuplicateQuestions",
32
+ "TwitterSemEval2015",
33
+ "TwitterURLCorpus",
34
+ ]
35
+
36
+ TASK_LIST_RERANKING = [
37
+ "AskUbuntuDupQuestions",
38
+ "MindSmallReranking",
39
+ "SciDocsRR",
40
+ "StackOverflowDupQuestions",
41
+ ]
42
+
43
+ TASK_LIST_RETRIEVAL = [
44
+ "ArguAna",
45
+ "ClimateFEVER",
46
+ "CQADupstackRetrieval",
47
+ "DBPedia",
48
+ "FEVER",
49
+ "FiQA2018",
50
+ "HotpotQA",
51
+ "MSMARCO",
52
+ "NFCorpus",
53
+ "NQ",
54
+ "QuoraRetrieval",
55
+ "SCIDOCS",
56
+ "SciFact",
57
+ "Touche2020",
58
+ "TRECCOVID",
59
+ ]
60
+
61
+ TASK_LIST_STS = [
62
+ "BIOSSES",
63
+ "SICK-R",
64
+ "STS12",
65
+ "STS13",
66
+ "STS14",
67
+ "STS15",
68
+ "STS16",
69
+ "STS17",
70
+ "STS22",
71
+ "STSBenchmark",
72
+ "SummEval",
73
+ ]
74
+
75
+ TASKS = (
76
+ TASK_LIST_CLASSIFICATION
77
+ + TASK_LIST_CLUSTERING
78
+ + TASK_LIST_PAIR_CLASSIFICATION
79
+ + TASK_LIST_RERANKING
80
+ + TASK_LIST_RETRIEVAL
81
+ + TASK_LIST_STS
82
+ )