Muennighoff commited on
Commit
b986a91
β€’
1 Parent(s): 3ae8f23
Files changed (1) hide show
  1. app.py +61 -21
app.py CHANGED
@@ -121,6 +121,20 @@ TASK_LIST_RETRIEVAL = [
121
  "TRECCOVID",
122
  ]
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
125
  "CQADupstackAndroidRetrieval",
126
  "CQADupstackEnglishRetrieval",
@@ -735,6 +749,7 @@ DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIF
735
  DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
736
  DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
737
  DATA_CLUSTERING_GERMAN = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
 
738
  DATA_STS = get_mteb_data(["STS"])
739
 
740
  # Exact, add all non-nan integer values for every dataset
@@ -810,7 +825,7 @@ with block:
810
  with gr.TabItem("Danish"):
811
  with gr.Row():
812
  gr.Markdown("""
813
- **Bitext Mining Danish Leaderboard πŸ‡©πŸ‡°πŸŽŒ**
814
 
815
  - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
816
  - **Languages:** Danish & Bornholmsk (Danish Dialect)
@@ -1072,26 +1087,51 @@ with block:
1072
  get_mteb_data, inputs=[task_reranking], outputs=data_reranking
1073
  )
1074
  with gr.TabItem("Retrieval"):
1075
- with gr.Row():
1076
- gr.Markdown("""
1077
- **Retrieval Leaderboard πŸ”Ž**
1078
-
1079
- - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
1080
- - **Languages:** English
1081
- """)
1082
- with gr.Row():
1083
- data_retrieval = gr.components.Dataframe(
1084
- DATA_RETRIEVAL,
1085
- # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
1086
- datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL.columns) * 2,
1087
- type="pandas",
1088
- )
1089
- with gr.Row():
1090
- data_run = gr.Button("Refresh")
1091
- task_retrieval = gr.Variable(value=["Retrieval"])
1092
- data_run.click(
1093
- get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval
1094
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1095
  with gr.TabItem("STS"):
1096
  with gr.TabItem("English"):
1097
  with gr.Row():
 
121
  "TRECCOVID",
122
  ]
123
 
124
+ TASK_LIST_RETRIEVAL_PL = [
125
+ "ArguAna-PL",
126
+ "DBPedia-PL",
127
+ "FiQA2018-PL",
128
+ "HotpotQA-PL",
129
+ "MSMARCO-PL",
130
+ "NFCorpus-PL",
131
+ "NQ-PL",
132
+ "Quora-PL",
133
+ "SCIDOCS-PL",
134
+ "SciFact-PL",
135
+ "TRECCOVID-PL",
136
+ ]
137
+
138
  TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
139
  "CQADupstackAndroidRetrieval",
140
  "CQADupstackEnglishRetrieval",
 
749
  DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
750
  DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
751
  DATA_CLUSTERING_GERMAN = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
752
+ #DATA_RETRIEVAL_PL = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_PL)
753
  DATA_STS = get_mteb_data(["STS"])
754
 
755
  # Exact, add all non-nan integer values for every dataset
 
825
  with gr.TabItem("Danish"):
826
  with gr.Row():
827
  gr.Markdown("""
828
+ **Bitext Mining Danish Leaderboard πŸŽŒπŸ‡©πŸ‡°**
829
 
830
  - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
831
  - **Languages:** Danish & Bornholmsk (Danish Dialect)
 
1087
  get_mteb_data, inputs=[task_reranking], outputs=data_reranking
1088
  )
1089
  with gr.TabItem("Retrieval"):
1090
+ with gr.TabItem("English"):
1091
+ with gr.Row():
1092
+ gr.Markdown("""
1093
+ **Retrieval Leaderboard πŸ”Ž**
1094
+
1095
+ - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
1096
+ - **Languages:** English
1097
+ """)
1098
+ with gr.Row():
1099
+ data_retrieval = gr.components.Dataframe(
1100
+ DATA_RETRIEVAL,
1101
+ # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
1102
+ datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL.columns) * 2,
1103
+ type="pandas",
1104
+ )
1105
+ with gr.Row():
1106
+ data_run = gr.Button("Refresh")
1107
+ task_retrieval = gr.Variable(value=["Retrieval"])
1108
+ data_run.click(
1109
+ get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval
1110
+ )
1111
+ '''
1112
+ with gr.TabItem("Polish"):
1113
+ with gr.Row():
1114
+ gr.Markdown("""
1115
+ **Retrieval Polish Leaderboard πŸ”ŽπŸ‡΅πŸ‡±**
1116
+
1117
+ - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
1118
+ - **Languages:** Polish
1119
+ - **Credits:** [Konrad Wojtasik](https://github.com/kwojtasi) & [BEIR-PL](https://arxiv.org/abs/2305.19840)
1120
+ """)
1121
+ with gr.Row():
1122
+ data_retrieval_pl = gr.components.Dataframe(
1123
+ DATA_RETRIEVAL_PL,
1124
+ # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
1125
+ datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_PL.columns) * 2,
1126
+ type="pandas",
1127
+ )
1128
+ with gr.Row():
1129
+ data_run = gr.Button("Refresh")
1130
+ task_retrieval_pl = gr.Variable(value=["Retrieval"])
1131
+ data_run.click(
1132
+ get_mteb_data, inputs=[task_retrieval_pl], outputs=data_retrieval_pl
1133
+ )
1134
+ '''
1135
  with gr.TabItem("STS"):
1136
  with gr.TabItem("English"):
1137
  with gr.Row():