attilabalint commited on
Commit
3d3e872
1 Parent(s): e11310d

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .streamlit/
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from components import buildings_view, models_view, performance_view, computation_view
4
+ import utils
5
+
6
+ st.set_page_config(page_title="Pv Generation Dashboard", layout="wide")
7
+
8
+ PAGES = [
9
+ "Buildings",
10
+ "Models",
11
+ "Performance",
12
+ "Computational Resources",
13
+ ]
14
+
15
+
16
+ @st.cache_data(ttl=86400)
17
+ def fetch_data():
18
+ return utils.get_wandb_data(
19
+ st.secrets["wandb_entity"],
20
+ "enfobench-pv-generation",
21
+ st.secrets["wandb_api_key"],
22
+ job_type="metrics",
23
+ )
24
+
25
+
26
+ data = fetch_data()
27
+ models = sorted(data["model"].unique().tolist())
28
+ models_to_plot = set()
29
+ model_groups: dict[str, list[str]] = {}
30
+
31
+ for model in models:
32
+ group, model_name = model.split(".", maxsplit=1)
33
+ if group not in model_groups:
34
+ model_groups[group] = []
35
+ model_groups[group].append(model_name)
36
+
37
+
38
+ with st.sidebar:
39
+ left, right = st.columns(
40
+ 2
41
+ ) # Create two columns within the right column for side-by-side images
42
+ with left:
43
+ st.image("./images/ku_leuven_logo.png")
44
+ with right:
45
+ st.image("./images/energyville_logo.png")
46
+
47
+ view = st.selectbox("View", PAGES, index=0)
48
+
49
+ st.header("Models to include")
50
+ left, right = st.columns(2)
51
+ with left:
52
+ select_none = st.button("Select None", use_container_width=True)
53
+ if select_none:
54
+ for model in models:
55
+ st.session_state[model] = False
56
+ with right:
57
+ select_all = st.button("Select All", use_container_width=True)
58
+ if select_all:
59
+ for model in models:
60
+ st.session_state[model] = True
61
+
62
+ for model_group, models in model_groups.items():
63
+ st.text(model_group)
64
+ for model_name in models:
65
+ to_plot = st.checkbox(model_name, value=True, key=f"{model_group}.{model_name}")
66
+ if to_plot:
67
+ models_to_plot.add(f"{model_group}.{model_name}")
68
+
69
+
70
+ st.title("EnFoBench - Electricity Demand")
71
+ st.divider()
72
+
73
+ if view == "Buildings":
74
+ buildings_view(data)
75
+ elif view == "Models":
76
+ models_view(data)
77
+ elif view == "Performance":
78
+ performance_view(data, models_to_plot)
79
+ elif view == "Computational Resources":
80
+ computation_view(data, models_to_plot)
81
+ else:
82
+ st.write("Not implemented yet")
components.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import plotly.express as px
4
+
5
+
6
+ def buildings_view(data):
7
+ buildings = (
8
+ data[
9
+ [
10
+ "unique_id",
11
+ "metadata.location_id",
12
+ "metadata.timezone",
13
+ "dataset.available_history.days",
14
+ "metadata.ac_capacity",
15
+ ]
16
+ ]
17
+ .groupby("unique_id")
18
+ .first()
19
+ .rename(
20
+ columns={
21
+ "metadata.location_id": "Location ID",
22
+ "metadata.timezone": "Timezone",
23
+ "dataset.available_history.days": "Available history (days)",
24
+ "metadata.ac_capacity": "Capacity (kW)",
25
+ }
26
+ )
27
+ )
28
+
29
+ st.metric("Number of buildings", len(buildings))
30
+ st.divider()
31
+
32
+ st.markdown("### Buildings")
33
+ st.dataframe(
34
+ buildings,
35
+ use_container_width=True,
36
+ column_config={
37
+ "Available history (days)": st.column_config.ProgressColumn(
38
+ "Available history (days)",
39
+ help="Available training data during the first prediction.",
40
+ format="%f",
41
+ min_value=0,
42
+ max_value=float(buildings['Available history (days)'].max()),
43
+ ),
44
+ "Capacity (kW)": st.column_config.ProgressColumn(
45
+ "Capacity (kW)",
46
+ help="Available training data during the first prediction.",
47
+ format="%f",
48
+ min_value=0,
49
+ max_value=float(buildings['Capacity (kW)'].max()),
50
+ ),
51
+ },
52
+ )
53
+
54
+
55
+ def models_view(data):
56
+ models = (
57
+ data[
58
+ [
59
+ "model",
60
+ "cv_config.folds",
61
+ "cv_config.horizon",
62
+ "cv_config.step",
63
+ "cv_config.time",
64
+ "model_info.repository",
65
+ "model_info.tag",
66
+ "model_info.variate_type",
67
+ ]
68
+ ]
69
+ .groupby("model")
70
+ .first()
71
+ .rename(
72
+ columns={
73
+ "cv_config.folds": "CV Folds",
74
+ "cv_config.horizon": "CV Horizon",
75
+ "cv_config.step": "CV Step",
76
+ "cv_config.time": "CV Time",
77
+ "model_info.repository": "Image Repository",
78
+ "model_info.tag": "Image Tag",
79
+ "model_info.variate_type": "Variate type",
80
+ }
81
+ )
82
+ )
83
+
84
+ st.metric("Number of models", len(models))
85
+ st.divider()
86
+
87
+ st.markdown("### Models")
88
+ st.dataframe(models, use_container_width=True)
89
+
90
+ left, right = st.columns(2, gap="large")
91
+ with left:
92
+ st.markdown("#### Variate types")
93
+ fig = px.pie(
94
+ models.groupby("Variate type").size().reset_index(),
95
+ values=0,
96
+ names="Variate type",
97
+ )
98
+ st.plotly_chart(fig, use_container_width=True)
99
+
100
+ with right:
101
+ st.markdown("#### Frameworks")
102
+ _df = models.copy()
103
+ _df["Framework"] = _df.index.str.split(".").str[0]
104
+ fig = px.pie(
105
+ _df.groupby("Framework").size().reset_index(),
106
+ values=0,
107
+ names="Framework",
108
+ )
109
+ st.plotly_chart(fig, use_container_width=True)
110
+
111
+
112
+ def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
113
+ data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
114
+ by="model", ascending=True
115
+ )
116
+
117
+ left, right = st.columns(2, gap="small")
118
+ with left:
119
+ metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
120
+ with right:
121
+ aggregation = st.selectbox(
122
+ "Aggregation", ["min", "mean", "median", "max", "std"], index=1
123
+ )
124
+ st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
125
+ fig = px.box(
126
+ data_to_plot,
127
+ x=f"{metric}.{aggregation}",
128
+ y="model",
129
+ color="model",
130
+ points="all",
131
+ )
132
+ fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
133
+ st.plotly_chart(fig, use_container_width=True)
134
+
135
+ st.divider()
136
+
137
+ left, right = st.columns(2, gap="large")
138
+ with left:
139
+ x_metric = st.selectbox(
140
+ "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
141
+ )
142
+ x_aggregation = st.selectbox(
143
+ "Aggregation",
144
+ ["min", "mean", "median", "max", "std"],
145
+ index=1,
146
+ key="x_aggregation",
147
+ )
148
+ with right:
149
+ y_metric = st.selectbox(
150
+ "Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
151
+ )
152
+ y_aggregation = st.selectbox(
153
+ "Aggregation",
154
+ ["min", "mean", "median", "max", "std"],
155
+ index=1,
156
+ key="y_aggregation",
157
+ )
158
+
159
+ st.markdown(
160
+ f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
161
+ )
162
+ fig = px.scatter(
163
+ data_to_plot,
164
+ x=f"{x_metric}.{x_aggregation}",
165
+ y=f"{y_metric}.{y_aggregation}",
166
+ color="model",
167
+ )
168
+ fig.update_layout(height=600)
169
+ st.plotly_chart(fig, use_container_width=True)
170
+
171
+ st.divider()
172
+
173
+ left, right = st.columns(2, gap="small")
174
+ with left:
175
+ metric = st.selectbox(
176
+ "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
177
+ )
178
+ with right:
179
+ aggregation = st.selectbox(
180
+ "Aggregation across folds",
181
+ ["min", "mean", "median", "max", "std"],
182
+ index=1,
183
+ key="table_aggregation",
184
+ )
185
+
186
+ metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[
187
+ [
188
+ f"{metric}.min",
189
+ f"{metric}.mean",
190
+ f"{metric}.median",
191
+ f"{metric}.max",
192
+ f"{metric}.std",
193
+ ]
194
+ ]
195
+
196
+ def custom_table(styler):
197
+ styler.background_gradient(cmap="seismic", axis=0)
198
+ styler.format(precision=2)
199
+
200
+ # center text and increase font size
201
+ styler.map(lambda x: "text-align: center; font-size: 14px;")
202
+ return styler
203
+
204
+ st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
205
+ styled_table = metrics_table.style.pipe(custom_table)
206
+ st.dataframe(styled_table, use_container_width=True)
207
+
208
+ metrics_table = (
209
+ data_to_plot.groupby(["model", "unique_id"])
210
+ .apply(aggregation, numeric_only=True)
211
+ .reset_index()
212
+ .pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
213
+ )
214
+
215
+ def custom_table(styler):
216
+ styler.background_gradient(cmap="seismic", axis=None)
217
+ styler.format(precision=2)
218
+
219
+ # center text and increase font size
220
+ styler.map(lambda x: "text-align: center; font-size: 14px;")
221
+ return styler
222
+
223
+ st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
224
+ styled_table = metrics_table.style.pipe(custom_table)
225
+ st.dataframe(styled_table, use_container_width=True)
226
+
227
+
228
+ def computation_view(data, models_to_plot: set[str]):
229
+ data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
230
+ by="model", ascending=True
231
+ )
232
+
233
+ st.markdown("#### Computational Resources")
234
+ fig = px.parallel_coordinates(
235
+ data_to_plot.groupby("model").mean(numeric_only=True).reset_index(),
236
+ dimensions=[
237
+ "model",
238
+ "resource_usage.CPU",
239
+ "resource_usage.memory",
240
+ "MAE.mean",
241
+ "RMSE.mean",
242
+ "MBE.mean",
243
+ "rMAE.mean",
244
+ ],
245
+ color="rMAE.mean",
246
+ color_continuous_scale=px.colors.diverging.Portland,
247
+ )
248
+ st.plotly_chart(fig, use_container_width=True)
249
+
250
+ st.divider()
251
+
252
+ left, center, right = st.columns(3, gap="small")
253
+ with left:
254
+ metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
255
+ with center:
256
+ aggregation_per_building = st.selectbox(
257
+ "Aggregation per building", ["min", "mean", "median", "max", "std"], index=1
258
+ )
259
+ with right:
260
+ aggregation_per_model = st.selectbox(
261
+ "Aggregation per model", ["min", "mean", "median", "max", "std"], index=1
262
+ )
263
+
264
+ st.markdown(
265
+ f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
266
+ )
267
+ aggregated_data = (
268
+ data_to_plot.groupby("model")
269
+ .agg(aggregation_per_building, numeric_only=True)
270
+ .reset_index()
271
+ )
272
+ fig = px.scatter(
273
+ aggregated_data,
274
+ x="resource_usage.CPU",
275
+ y=f"{metric}.{aggregation_per_model}",
276
+ color="model",
277
+ log_x=True,
278
+ )
279
+ fig.update_layout(height=600)
280
+ st.plotly_chart(fig, use_container_width=True)
images/energyville_logo.png ADDED
images/ku_leuven_logo.png ADDED
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ wandb==0.17.0
2
+ plotly==5.20.0
utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import wandb
3
+
4
+
5
+ def get_wandb_data(entity: str, project: str, api_key: str, job_type: str) -> pd.DataFrame:
6
+ api = wandb.Api(api_key=api_key)
7
+
8
+ # Project is specified by <entity/project-name>
9
+ filter_dict = {"jobType": job_type}
10
+ runs = api.runs(f"{entity}/{project}", filters=filter_dict)
11
+
12
+ summary_list, config_list, name_list = [], [], []
13
+ for run in runs:
14
+ # .summary contains the output keys/values for metrics like accuracy.
15
+ # We call ._json_dict to omit large files
16
+ summary_list.append(run.summary._json_dict)
17
+
18
+ # .config contains the hyperparameters.
19
+ # We remove special values that start with _.
20
+ config_list.append({k: v for k, v in run.config.items()})
21
+
22
+ # .name is the human-readable name of the run.
23
+ name_list.append(run.name)
24
+
25
+ summary_df = pd.json_normalize(summary_list, max_level=1)
26
+ config_df = pd.json_normalize(config_list, max_level=2)
27
+ runs_df = pd.concat([summary_df, config_df], axis=1)
28
+ runs_df.index = name_list
29
+ return runs_df