albertvillanova HF staff commited on
Commit
8e404a5
β€’
1 Parent(s): 1c1cb58

Fix missing results by reading all files

Browse files
Files changed (2) hide show
  1. app.py +5 -5
  2. src/results.py +14 -10
app.py CHANGED
@@ -7,12 +7,12 @@ from src.details import update_subtasks_component, update_load_details_component
7
  display_details, update_sample_idx_component, clear_details
8
  from src.results import update_load_results_component, \
9
  load_results_dataframes, display_results, update_tasks_component, clear_results, \
10
- filter_latest_result_path_per_model, fetch_result_paths
11
 
12
 
13
  # if __name__ == "__main__":
14
- latest_result_path_per_model = filter_latest_result_path_per_model(fetch_result_paths())
15
- load_results_dataframes = partial(load_results_dataframes, result_path_per_model=latest_result_path_per_model)
16
 
17
  with gr.Blocks(fill_height=True) as demo:
18
  gr.HTML("<h1 style='text-align: center;'>Compare Results of the πŸ€— Open LLM Leaderboard</h1>")
@@ -20,10 +20,10 @@ with gr.Blocks(fill_height=True) as demo:
20
 
21
  with gr.Row():
22
  with gr.Column():
23
- model_id_1 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models")
24
  dataframe_1 = gr.Dataframe(visible=False)
25
  with gr.Column():
26
- model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models")
27
  dataframe_2 = gr.Dataframe(visible=False)
28
 
29
  with gr.Row():
 
7
  display_details, update_sample_idx_component, clear_details
8
  from src.results import update_load_results_component, \
9
  load_results_dataframes, display_results, update_tasks_component, clear_results, \
10
+ sort_result_paths_per_model, fetch_result_paths
11
 
12
 
13
  # if __name__ == "__main__":
14
+ result_paths_per_model = sort_result_paths_per_model(fetch_result_paths())
15
+ load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model)
16
 
17
  with gr.Blocks(fill_height=True) as demo:
18
  gr.HTML("<h1 style='text-align: center;'>Compare Results of the πŸ€— Open LLM Leaderboard</h1>")
 
20
 
21
  with gr.Row():
22
  with gr.Column():
23
+ model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
24
  dataframe_1 = gr.Dataframe(visible=False)
25
  with gr.Column():
26
+ model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
27
  dataframe_2 = gr.Dataframe(visible=False)
28
 
29
  with gr.Row():
src/results.py CHANGED
@@ -13,35 +13,39 @@ def fetch_result_paths():
13
  return paths
14
 
15
 
16
- def filter_latest_result_path_per_model(paths):
17
  from collections import defaultdict
18
 
19
  d = defaultdict(list)
20
  for path in paths:
21
  model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
22
  d[model_id].append(path)
23
- return {model_id: max(paths) for model_id, paths in d.items()}
24
 
25
 
26
  def update_load_results_component():
27
  return (gr.Button("Load", interactive=True), ) * 2
28
 
29
 
30
- def load_results_dataframe(model_id, result_path_per_model=None):
31
- if not model_id or not result_path_per_model:
32
  return
33
- result_path = result_path_per_model[model_id]
34
  fs = HfFileSystem()
35
- with fs.open(result_path, "r") as f:
36
- data = json.load(f)
37
- model_name = data.get("model_name", "Model")
 
 
 
 
38
  df = pd.json_normalize([{key: value for key, value in data.items()}])
39
  # df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
40
  return df.set_index(pd.Index([model_name])).reset_index()
41
 
42
 
43
- def load_results_dataframes(*model_ids, result_path_per_model=None):
44
- return [load_results_dataframe(model_id, result_path_per_model=result_path_per_model) for model_id in model_ids]
45
 
46
 
47
  def display_results(task, *dfs):
 
13
  return paths
14
 
15
 
16
+ def sort_result_paths_per_model(paths):
17
  from collections import defaultdict
18
 
19
  d = defaultdict(list)
20
  for path in paths:
21
  model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
22
  d[model_id].append(path)
23
+ return {model_id: sorted(paths) for model_id, paths in d.items()}
24
 
25
 
26
  def update_load_results_component():
27
  return (gr.Button("Load", interactive=True), ) * 2
28
 
29
 
30
+ def load_results_dataframe(model_id, result_paths_per_model=None):
31
+ if not model_id or not result_paths_per_model:
32
  return
33
+ result_paths = result_paths_per_model[model_id]
34
  fs = HfFileSystem()
35
+ data = {"results": {}, "configs": {}}
36
+ for path in result_paths:
37
+ with fs.open(path, "r") as f:
38
+ d = json.load(f)
39
+ data["results"].update(d["results"])
40
+ data["configs"].update(d["configs"])
41
+ model_name = d.get("model_name", "Model")
42
  df = pd.json_normalize([{key: value for key, value in data.items()}])
43
  # df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
44
  return df.set_index(pd.Index([model_name])).reset_index()
45
 
46
 
47
+ def load_results_dataframes(*model_ids, result_paths_per_model=None):
48
+ return [load_results_dataframe(model_id, result_paths_per_model=result_paths_per_model) for model_id in model_ids]
49
 
50
 
51
  def display_results(task, *dfs):