Spaces:
Running
Running
""" | |
It provides a leaderboard component. | |
""" | |
from collections import defaultdict | |
import enum | |
import math | |
from typing import Dict, List, Tuple | |
import gradio as gr | |
import lingua | |
import db | |
from db import get_battles | |
SUPPORTED_LANGUAGES = [ | |
language.name.capitalize() for language in lingua.Language.all() | |
] | |
ANY_LANGUAGE = "Any" | |
class LeaderboardTab(enum.Enum): | |
SUMMARIZATION = "Summarization" | |
TRANSLATION = "Translation" | |
# Ref: https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing#scrollTo=QLGc6DwxyvQc pylint: disable=line-too-long | |
def compute_elo(battles: List[db.Battle], | |
k=4, | |
scale=400, | |
base=10, | |
initial_rating=1000) -> Dict[str, int]: | |
rating = defaultdict(lambda: initial_rating) | |
for battle in battles: | |
model_a, model_b, winner = battle.model_a, battle.model_b, battle.winner | |
rating_a = rating[model_a] | |
rating_b = rating[model_b] | |
expected_score_a = 1 / (1 + base**((rating_b - rating_a) / scale)) | |
expected_score_b = 1 / (1 + base**((rating_a - rating_b) / scale)) | |
scored_point_a = 0.5 if winner == "tie" else int(winner == "model_a") | |
rating[model_a] += k * (scored_point_a - expected_score_a) | |
rating[model_b] += k * (1 - scored_point_a - expected_score_b) | |
return {model: math.floor(rating + 0.5) for model, rating in rating.items()} | |
def load_elo_ratings(tab, source_lang: str, target_lang: str | None): | |
category = db.Category.SUMMARIZATION if tab == LeaderboardTab.SUMMARIZATION else db.Category.TRANSLATION | |
# TODO(#37): Call db.get_ratings and return the ratings if exists. | |
battles = get_battles(category, | |
None if source_lang == ANY_LANGUAGE else source_lang, | |
None if target_lang == ANY_LANGUAGE else target_lang) | |
if not battles: | |
return | |
computed_ratings = compute_elo(battles) | |
db.set_ratings( | |
category, | |
[db.Rating(model, rating) for model, rating in computed_ratings.items()], | |
source_lang, target_lang) | |
sorted_ratings = sorted( | |
computed_ratings.items(), | |
key=lambda x: x[1], # rating | |
reverse=True) | |
rank = 0 | |
last_rating = None | |
rating_rows = [] | |
for index, (model, rating) in enumerate(sorted_ratings): | |
if rating != last_rating: | |
rank = index + 1 | |
rating_rows.append([rank, model, rating]) | |
last_rating = rating | |
return rating_rows | |
LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes | |
LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes." | |
def update_filtered_leaderboard(tab: str, source_lang: str, | |
target_lang: str | None): | |
new_value = load_elo_ratings(tab, source_lang, target_lang) | |
return gr.update(value=new_value) | |
def build_leaderboard(): | |
with gr.Tabs(): | |
# Returns (original leaderboard, filtered leaderboard). | |
def toggle_leaderboard(language: str) -> Tuple[gr.Dataframe, gr.Dataframe]: | |
filter_chosen = language != ANY_LANGUAGE | |
return gr.Dataframe(visible=not filter_chosen), gr.Dataframe( | |
visible=filter_chosen) | |
with gr.Tab(LeaderboardTab.SUMMARIZATION.value): | |
summary_language = gr.Dropdown(choices=SUPPORTED_LANGUAGES + | |
[ANY_LANGUAGE], | |
value=ANY_LANGUAGE, | |
label="Summary language", | |
interactive=True) | |
filtered_summarization = gr.DataFrame( | |
headers=["Rank", "Model", "Elo rating"], | |
datatype=["number", "str", "number"], | |
value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION, | |
ANY_LANGUAGE, None), | |
elem_classes="leaderboard", | |
visible=False) | |
original_summarization = gr.Dataframe( | |
headers=["Rank", "Model", "Elo rating"], | |
datatype=["number", "str", "number"], | |
value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION, | |
ANY_LANGUAGE, None), | |
every=LEADERBOARD_UPDATE_INTERVAL, | |
elem_classes="leaderboard") | |
gr.Markdown(LEADERBOARD_INFO) | |
summary_language.change( | |
fn=update_filtered_leaderboard, | |
inputs=[ | |
gr.State(LeaderboardTab.SUMMARIZATION), summary_language, | |
gr.State(None) | |
], | |
outputs=filtered_summarization).then( | |
fn=toggle_leaderboard, | |
inputs=summary_language, | |
outputs=[original_summarization, filtered_summarization]) | |
with gr.Tab(LeaderboardTab.TRANSLATION.value): | |
with gr.Row(): | |
source_language = gr.Dropdown(choices=SUPPORTED_LANGUAGES + | |
[ANY_LANGUAGE], | |
label="Source language", | |
value=ANY_LANGUAGE, | |
interactive=True) | |
target_language = gr.Dropdown(choices=SUPPORTED_LANGUAGES + | |
[ANY_LANGUAGE], | |
label="Target language", | |
value=ANY_LANGUAGE, | |
interactive=True) | |
filtered_translation = gr.DataFrame( | |
headers=["Rank", "Model", "Elo rating"], | |
datatype=["number", "str", "number"], | |
value=lambda: load_elo_ratings(LeaderboardTab.TRANSLATION, | |
ANY_LANGUAGE, ANY_LANGUAGE), | |
elem_classes="leaderboard", | |
visible=False) | |
original_translation = gr.Dataframe( | |
headers=["Rank", "Model", "Elo rating"], | |
datatype=["number", "str", "number"], | |
value=lambda: load_elo_ratings(LeaderboardTab.TRANSLATION, | |
ANY_LANGUAGE, ANY_LANGUAGE), | |
every=LEADERBOARD_UPDATE_INTERVAL, | |
elem_classes="leaderboard") | |
gr.Markdown(LEADERBOARD_INFO) | |
source_language.change( | |
fn=update_filtered_leaderboard, | |
inputs=[ | |
gr.State(LeaderboardTab.TRANSLATION), source_language, | |
target_language | |
], | |
outputs=filtered_translation).then( | |
fn=toggle_leaderboard, | |
inputs=source_language, | |
outputs=[original_translation, filtered_translation]) | |
target_language.change( | |
fn=update_filtered_leaderboard, | |
inputs=[ | |
gr.State(LeaderboardTab.TRANSLATION), source_language, | |
target_language | |
], | |
outputs=filtered_translation).then( | |
fn=toggle_leaderboard, | |
inputs=target_language, | |
outputs=[original_translation, filtered_translation]) | |