Spaces:

open-llm-leaderboard
/

comparator

Running

App Files Files Community

comparator / src /results.py

albertvillanova HF staff

Fix missing results by reading all files

8e404a5 verified 10 days ago

raw

history blame

3.27 kB

	import json

	import gradio as gr
	import pandas as pd
	from huggingface_hub import HfFileSystem

	from src.constants import RESULTS_DATASET_ID, TASKS


	def fetch_result_paths():
	fs = HfFileSystem()
	paths = fs.glob(f"{RESULTS_DATASET_ID}///*.json")
	return paths


	def sort_result_paths_per_model(paths):
	from collections import defaultdict

	d = defaultdict(list)
	for path in paths:
	model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
	d[model_id].append(path)
	return {model_id: sorted(paths) for model_id, paths in d.items()}


	def update_load_results_component():
	return (gr.Button("Load", interactive=True), ) * 2


	def load_results_dataframe(model_id, result_paths_per_model=None):
	if not model_id or not result_paths_per_model:
	return
	result_paths = result_paths_per_model[model_id]
	fs = HfFileSystem()
	data = {"results": {}, "configs": {}}
	for path in result_paths:
	with fs.open(path, "r") as f:
	d = json.load(f)
	data["results"].update(d["results"])
	data["configs"].update(d["configs"])
	model_name = d.get("model_name", "Model")
	df = pd.json_normalize([{key: value for key, value in data.items()}])
	# df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
	return df.set_index(pd.Index([model_name])).reset_index()


	def load_results_dataframes(*model_ids, result_paths_per_model=None):
	return [load_results_dataframe(model_id, result_paths_per_model=result_paths_per_model) for model_id in model_ids]


	def display_results(task, *dfs):
	dfs = [df.set_index("index") for df in dfs if "index" in df.columns]
	if not dfs:
	return None, None
	df = pd.concat(dfs)
	df = df.T.rename_axis(columns=None)
	return display_tab("results", df, task), display_tab("configs", df, task)


	def display_tab(tab, df, task):
	df = df.style.format(na_rep="")
	df.hide(
	[
	row
	for row in df.index
	if (
	not row.startswith(f"{tab}.")
	or row.startswith(f"{tab}.leaderboard.")
	or row.endswith(".alias")
	or (not row.startswith(f"{tab}.{task}") if task != "All" else False)
	)
	],
	axis="index",
	)
	start = len(f"{tab}.leaderboard_") if task == "All" else len(f"{tab}.{task} ")
	df.format_index(lambda idx: idx[start:].removesuffix(",none"), axis="index")
	return df.to_html()


	def update_tasks_component():
	return (
	gr.Radio(
	["All"] + list(TASKS.values()),
	label="Tasks",
	info="Evaluation tasks to be displayed",
	value="All",
	visible=True,
	),
	) * 2


	def clear_results():
	# model_id_1, model_id_2, dataframe_1, dataframe_2, load_results_btn, load_configs_btn, results_task, configs_task
	return (
	None, None, None, None,
	(gr.Button("Load", interactive=False), ) 2,
	*(
	gr.Radio(
	["All"] + list(TASKS.values()),
	label="Tasks",
	info="Evaluation tasks to be displayed",
	value="All",
	visible=False,
	),
	) * 2,
	)