albertvillanova HF staff commited on
Commit
05c90f4
1 Parent(s): ed0883d

Add Results tab

Browse files
Files changed (1) hide show
  1. app.py +90 -22
app.py CHANGED
@@ -6,7 +6,17 @@ from huggingface_hub import HfFileSystem
6
 
7
 
8
  RESULTS_DATASET_ID = "datasets/open-llm-leaderboard/results"
9
-
 
 
 
 
 
 
 
 
 
 
10
 
11
  fs = HfFileSystem()
12
 
@@ -30,24 +40,73 @@ def get_result_path_from_model(model_id, result_path_per_model):
30
  return result_path_per_model[model_id]
31
 
32
 
33
- def load_result(result_path) -> pd.DataFrame:
34
  with fs.open(result_path, "r") as f:
35
  data = json.load(f)
 
 
 
 
 
 
 
 
 
36
  model_name = data.get("model_name", "Model")
37
- df = pd.json_normalize([data])
 
 
 
 
 
 
 
 
 
 
 
38
  return df.iloc[0].rename_axis("Parameters").rename(model_name).to_frame() # .reset_index()
39
 
40
 
41
- def render_result_1(model_id, results):
42
- result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
43
- result = load_result(result_path)
44
- return pd.concat([result, results.iloc[:, [0, 2]].set_index("Parameters")], axis=1).reset_index()
45
 
46
 
47
- def render_result_2(model_id, results):
48
- result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
49
- result = load_result(result_path)
50
- return pd.concat([results.iloc[:, [0, 1]].set_index("Parameters"), result], axis=1).reset_index()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  # if __name__ == "__main__":
@@ -66,23 +125,32 @@ with gr.Blocks(fill_height=True) as demo:
66
  load_btn_2 = gr.Button("Load")
67
 
68
  with gr.Row():
69
- compared_results = gr.Dataframe(
70
- label="Results",
71
- headers=["Parameters", "Result-1", "Result-2"],
72
- interactive=False,
73
- column_widths=["30%", "30%", "30%"],
74
- wrap=True
75
- )
 
 
 
 
 
 
 
 
 
76
 
77
  load_btn_1.click(
78
  fn=render_result_1,
79
- inputs=[model_id_1, compared_results],
80
- outputs=compared_results,
81
  )
82
  load_btn_2.click(
83
  fn=render_result_2,
84
- inputs=[model_id_2, compared_results],
85
- outputs=compared_results,
86
  )
87
 
88
  demo.launch()
 
6
 
7
 
8
  RESULTS_DATASET_ID = "datasets/open-llm-leaderboard/results"
9
+ EXCLUDED_KEYS = {
10
+ "pretty_env_info",
11
+ "chat_template",
12
+ "group_subtasks",
13
+ }
14
+ EXCLUDED_RESULTS_KEYS = {
15
+ "leaderboard",
16
+ }
17
+ EXCLUDED_RESULTS_LEADERBOARDS_KEYS = {
18
+ "leaderboard",
19
+ }
20
 
21
  fs = HfFileSystem()
22
 
 
40
  return result_path_per_model[model_id]
41
 
42
 
43
+ def load_data(result_path) -> pd.DataFrame:
44
  with fs.open(result_path, "r") as f:
45
  data = json.load(f)
46
+ return data
47
+ # model_name = data.get("model_name", "Model")
48
+ # df = pd.json_normalize([data])
49
+ # return df.iloc[0].rename_axis("Parameters").rename(model_name).to_frame() # .reset_index()
50
+
51
+
52
+ def load_result(model_id):
53
+ result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
54
+ data = load_data(result_path)
55
  model_name = data.get("model_name", "Model")
56
+ result = [
57
+ to_vertical(to_dataframe_all(data), model_name),
58
+ to_vertical(to_dataframe_results(data), model_name)
59
+ ]
60
+ return result
61
+
62
+
63
+ def to_dataframe(data):
64
+ return pd.DataFrame.from_records([data])
65
+
66
+
67
+ def to_vertical(df, model_name):
68
  return df.iloc[0].rename_axis("Parameters").rename(model_name).to_frame() # .reset_index()
69
 
70
 
71
+ def to_dataframe_all(data):
72
+ return pd.json_normalize([{key: value for key, value in data.items() if key not in EXCLUDED_KEYS}])
 
 
73
 
74
 
75
+ def to_dataframe_results(data):
76
+ dfs = {}
77
+ for key in data["results"]:
78
+ if key not in EXCLUDED_RESULTS_KEYS: # key.startswith("leaderboard_"):
79
+ name = key[len("leaderboard_"):]
80
+ df = to_dataframe(
81
+ {
82
+ key: value
83
+ for key, value in data["results"][key].items()
84
+ if key not in EXCLUDED_RESULTS_LEADERBOARDS_KEYS
85
+ }
86
+ )
87
+ # df.drop(columns=["alias"])
88
+ # df.columns = pd.MultiIndex.from_product([[name], df.columns])
89
+ df.columns = [f"{name}.{column}" for column in df.columns]
90
+ dfs[name] = df
91
+ return pd.concat(dfs.values(), axis="columns")
92
+
93
+
94
+ def concat_result_1(result_1, results):
95
+ return pd.concat([result_1, results.iloc[:, [0, 2]].set_index("Parameters")], axis=1).reset_index()
96
+
97
+
98
+ def concat_result_2(result_2, results):
99
+ return pd.concat([results.iloc[:, [0, 1]].set_index("Parameters"), result_2], axis=1).reset_index()
100
+
101
+
102
+ def render_result_1(model_id, *results):
103
+ result = load_result(model_id)
104
+ return [concat_result_1(*result_args) for result_args in zip(result, results)]
105
+
106
+
107
+ def render_result_2(model_id, *results):
108
+ result = load_result(model_id)
109
+ return [concat_result_2(*result_args) for result_args in zip(result, results)]
110
 
111
 
112
  # if __name__ == "__main__":
 
125
  load_btn_2 = gr.Button("Load")
126
 
127
  with gr.Row():
128
+ with gr.Tab("All"):
129
+ compared_results_all = gr.Dataframe(
130
+ label="Results",
131
+ headers=["Parameters", "Model-1", "Model-2"],
132
+ interactive=False,
133
+ column_widths=["30%", "30%", "30%"],
134
+ wrap=True,
135
+ )
136
+ with gr.Tab("Results"):
137
+ compared_results_results = gr.Dataframe(
138
+ label="Results",
139
+ headers=["Parameters", "Model-1", "Model-2"],
140
+ interactive=False,
141
+ column_widths=["30%", "30%", "30%"],
142
+ wrap=True,
143
+ )
144
 
145
  load_btn_1.click(
146
  fn=render_result_1,
147
+ inputs=[model_id_1, compared_results_all, compared_results_results],
148
+ outputs=[compared_results_all, compared_results_results],
149
  )
150
  load_btn_2.click(
151
  fn=render_result_2,
152
+ inputs=[model_id_2, compared_results_all, compared_results_results],
153
+ outputs=[compared_results_all, compared_results_results],
154
  )
155
 
156
  demo.launch()