File size: 3,067 Bytes
8b7c122
 
9b3b05a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e893bbb
9b3b05a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e893bbb
 
 
 
 
 
 
 
 
 
9b3b05a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# inspired by https://huggingface.co/spaces/ought/raft-leaderboard

import numpy as np
import pandas as pd
import requests
import streamlit as st
from tasks import TASKS
from huggingface_hub import HfApi
import datasets
import os

FORMATTED_TASK_NAMES = TASKS
api = HfApi()

def download_submissions():
    submissions = api.list_datasets(
        filter=("benchmark:mteb"), use_auth_token=os.getenv("HF_HUB_TOKEN")
    )
    return submissions

@st.cache
def format_submissions(submissions):
    submission_data = {**{"Submitter": []}, **{"Submission Name": []}, **{"Submission Date": []}, **{t: [] for t in TASKS}}
    # The following picks the latest submissions which adhere to the model card schema
    for submission in submissions:
        submission_id = submission.id
        try:
            data = list(datasets.load_dataset(submission_id, streaming=True, data_files="*csv").values())[0]
        except FileNotFoundError:
            print("FileNotFoundError")
            continue
        for line in data:
            if line['dataset'] in submission_data:
                submission_data[line['dataset']].append(line['value'])
            if len(submission_data['Submission Name']) == 0 or line['model'] != submission_data['Submission Name'][-1]:
                submission_data['Submission Name'].append(line['model'])
        while len(submission_data['Submission Name']) > len(submission_data['Submitter']):
            submission_data['Submitter'].append(submission.author)
            submission_data["Submission Date"].append(pd.to_datetime(submission.lastModified).strftime("%b %d, %Y"))


    df = pd.DataFrame(submission_data)
    df.insert(3, "Overall", df[TASKS].mean(axis=1))
    df = df.copy().sort_values("Overall", ascending=False)
    df.rename(columns={k: v for k, v in zip(TASKS, FORMATTED_TASK_NAMES)}, inplace=True)
    # Start ranking from 1
    df.insert(0, "Rank", np.arange(1, len(df) + 1))
    return df


###########
### APP ###
###########
st.set_page_config(layout="wide")
st.title("MTEB: Massive Text Embedding Benchmark")
st.markdown(
    """
    To submit to MTEB, please follow the following instructions:
    - Publish your .csv MTEB scores to a public Hugging Face Hub Dataset. The .csv files must be at the root of the repo.
    - Add the following to the top of your model card:
    ```
    ---
    benchmark: mteb
    type: evaluation
    ---
    ```
    That's all! [Here's an example](https://huggingface.co/datasets/mteb/mteb-example-submission/tree/main) of how your repo should look like. You should now be able to see your results in the leaderboard below.
"""
)
submissions = download_submissions()
df = format_submissions(submissions)
styler = df.style.set_precision(3).set_properties(**{"white-space": "pre-wrap", "text-align": "center"})
# hack to remove index column: https://discuss.streamlit.io/t/questions-on-st-table/6878/3
st.markdown(
    """
<style>
table td:nth-child(1) {
    display: none
}
table th:nth-child(1) {
    display: none
}
</style>
""",
    unsafe_allow_html=True,
)
st.table(styler)