Spaces:

EDS-lab
/

EnFoBench-PVGeneration

Runtime error

App Files Files Community

attila-balint-kul commited on Jun 21

Commit

dae6339

•

1 Parent(s): e7ac9ff

Upload 4 files

Browse files

Files changed (4) hide show

app.py +22 -23
components.py +300 -96
requirements.txt +2 -1
utils.py +18 -6

app.py CHANGED Viewed

@@ -1,35 +1,40 @@
 import streamlit as st
 from components import (
     buildings_view,
-    models_view,
-    performance_view,
     computation_view,
     logos,
     model_selector,
-    header,
     overview_view,
 )
-import utils
 PAGES = [
     "Overview",
     "Buildings",
     "Models",
-    "Performance",
     "Computational Resources",
 ]
-st.set_page_config(page_title="Pv Generation Dashboard", layout="wide")
 @st.cache_data(ttl=86400)
 def fetch_data():
     return utils.get_wandb_data(
-        st.secrets["wandb_entity"],
-        "enfobench-pv-generation",
-        st.secrets["wandb_api_key"],
         job_type="metrics",
     )
@@ -45,19 +50,11 @@ with st.sidebar:
     logos()
     view = st.selectbox("View", PAGES, index=0)
-    if view == "Performance" or view == "Computational Resources":
-        models_to_plot = model_selector(models)
     if view == "Overview":
-        st.header("Sources")
-        st.link_button("GitHub Repository", url="https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit", use_container_width=True)
-        st.link_button("Documentation", url="https://attila-balint-kul.github.io/energy-forecast-benchmark-toolkit/", use_container_width=True)
-        st.link_button("PV Generation Dataset", url="https://huggingface.co/datasets/EDS-lab/pv-generation", use_container_width=True)
-        st.link_button("HuggingFace Organization", url="https://huggingface.co/EDS-lab", use_container_width=True)
-        st.header("Other Dashboards")
-        st.link_button("Electricity Demand", url="https://huggingface.co/spaces/EDS-lab/EnFoBench-ElectricityDemand", use_container_width=True)
-        st.link_button("Gas Demand", url="https://huggingface.co/spaces/EDS-lab/EnFoBench-GasDemand", use_container_width=True)
     st.header("Refresh data")
     refresh = st.button(
@@ -68,7 +65,7 @@ with st.sidebar:
         st.rerun()
-header()
 if view == "Overview":
     overview_view(data)
@@ -76,8 +73,10 @@ elif view == "Buildings":
     buildings_view(data)
 elif view == "Models":
     models_view(data)
-elif view == "Performance":
-    performance_view(data, models_to_plot)
 elif view == "Computational Resources":
     computation_view(data, models_to_plot)
 else:

 import streamlit as st
+import utils
 from components import (
     buildings_view,
     computation_view,
+    header,
     logos,
     model_selector,
+    models_view,
     overview_view,
+    accuracy_view,
+    relative_performance_view,
+    links,
 )
+USE_CASE = st.secrets["enfobench_usecase"]
 PAGES = [
     "Overview",
     "Buildings",
     "Models",
+    "Accuracy",
+    "Relative Performance",
     "Computational Resources",
 ]
+st.set_page_config(page_title=f"{USE_CASE} Dashboard", layout="wide")
 @st.cache_data(ttl=86400)
 def fetch_data():
     return utils.get_wandb_data(
+        entity=st.secrets["wandb_entity"],
+        project=st.secrets["wandb_project"],
+        api_key=st.secrets["wandb_api_key"],
         job_type="metrics",
     )
     logos()
     view = st.selectbox("View", PAGES, index=0)
+    if view in ["Accuracy", "Relative Performance", "Computational Resources"]:
+        models_to_plot = model_selector(models, data)
     if view == "Overview":
+        links(current=USE_CASE)
     st.header("Refresh data")
     refresh = st.button(
         st.rerun()
+header(f"EnFoBench - {USE_CASE}")
 if view == "Overview":
     overview_view(data)
     buildings_view(data)
 elif view == "Models":
     models_view(data)
+elif view == "Accuracy":
+    accuracy_view(data, models_to_plot)
+elif view == "Relative Performance":
+    relative_performance_view(data, models_to_plot)
 elif view == "Computational Resources":
     computation_view(data, models_to_plot)
 else:

components.py CHANGED Viewed

@@ -1,12 +1,19 @@
 import pandas as pd
-import streamlit as st
 import plotly.express as px
-from utils import get_leaderboard
-def header() -> None:
-    st.title("EnFoBench - PV Generation")
     st.divider()
@@ -18,7 +25,51 @@ def logos() -> None:
         st.image("./images/energyville_logo.png")
-def model_selector(models: list[str]) -> set[str]:
     # Group models by their prefix
     model_groups: dict[str, list[str]] = {}
     for model in models:
@@ -30,6 +81,35 @@ def model_selector(models: list[str]) -> set[str]:
     models_to_plot = set()
     st.header("Models to include")
     left, right = st.columns(2)
     with left:
         select_none = st.button("Select None", use_container_width=True)
@@ -53,16 +133,7 @@ def model_selector(models: list[str]) -> set[str]:
     return models_to_plot
-def overview_view(data):
-    st.markdown("""
-    [EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit)
-    is a community driven benchmarking framework for energy forecasting models.
-    This dashboard presents the results of the PV generation forecasting usecase. All models were cross-validated
-    on **365 days** of day ahead forecasting horizon *(10AM until midnight of the next day)*.
-    """)
-    st.divider()
     st.markdown("## Leaderboard")
     leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"])
@@ -76,7 +147,10 @@ def overview_view(data):
         )
         fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index)
         fig.update_layout(
-            title="Top 10 models by MAE", xaxis_title="", yaxis_title="Model"
         )
         st.plotly_chart(fig, use_container_width=True)
@@ -87,7 +161,9 @@ def overview_view(data):
             .sort_values("RMSE.mean")
         )
         fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index)
-        fig.update_layout(title="Top 10 models by RMSE", xaxis_title="", yaxis_title="")
         st.plotly_chart(fig, use_container_width=True)
     with right:
@@ -97,41 +173,109 @@ def overview_view(data):
             .sort_values("rMAE.mean")
         )
         fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index)
-        fig.update_layout(title="Top 10 models by rMAE", xaxis_title="", yaxis_title="")
         st.plotly_chart(fig, use_container_width=True)
     st.dataframe(leaderboard, use_container_width=True)
-def buildings_view(data):
     buildings = (
         data[
             [
                 "unique_id",
                 "metadata.location_id",
                 "metadata.timezone",
                 "dataset.available_history.days",
-                "metadata.ac_capacity",
             ]
         ]
         .groupby("unique_id")
         .first()
         .rename(
             columns={
                 "metadata.location_id": "Location ID",
                 "metadata.timezone": "Timezone",
                 "dataset.available_history.days": "Available history (days)",
-                "metadata.ac_capacity": "Capacity (kW)",
             }
         )
     )
-    st.metric("Number of buildings", len(buildings))
     st.divider()
-    st.markdown("### Buildings")
     st.dataframe(
-        buildings,
         use_container_width=True,
         column_config={
             "Available history (days)": st.column_config.ProgressColumn(
@@ -141,18 +285,18 @@ def buildings_view(data):
                 min_value=0,
                 max_value=float(buildings["Available history (days)"].max()),
             ),
-            "Capacity (kW)": st.column_config.ProgressColumn(
-                "Capacity (kW)",
                 help="Available training data during the first prediction.",
                 format="%f",
                 min_value=0,
-                max_value=float(buildings["Capacity (kW)"].max()),
             ),
         },
     )
-def models_view(data):
     models = (
         data[
             [
@@ -181,12 +325,21 @@ def models_view(data):
         )
     )
-    st.metric("Number of models", len(models))
     st.divider()
-    st.markdown("### Models")
-    st.dataframe(models, use_container_width=True)
     left, right = st.columns(2, gap="large")
     with left:
         st.markdown("#### Variate types")
@@ -208,8 +361,12 @@ def models_view(data):
         )
         st.plotly_chart(fig, use_container_width=True)
-def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
     data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
         by="model", ascending=True
     )
@@ -223,24 +380,20 @@ def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
         )
     st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
-    rank_df = (
-        data_to_plot.groupby(["model"])
-        .agg("median", numeric_only=True)
-        .sort_values(by=f"{metric}.{aggregation}")
-        .reset_index()
-        .rename_axis("rank")
-        .reset_index()[["rank", "model"]]
-    )
-    fig = px.box(
-        data_to_plot.merge(rank_df, on="model").sort_values(by="rank"),
-        x=f"{metric}.{aggregation}",
-        y="model",
-        color="model",
-        points="all",
-    )
-    fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
-    st.plotly_chart(fig, use_container_width=True)
     st.divider()
@@ -269,14 +422,17 @@ def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
     st.markdown(
         f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
     )
-    fig = px.scatter(
-        data_to_plot,
-        x=f"{x_metric}.{x_aggregation}",
-        y=f"{y_metric}.{y_aggregation}",
-        color="model",
-    )
-    fig.update_layout(height=600)
-    st.plotly_chart(fig, use_container_width=True)
     st.divider()
@@ -301,7 +457,7 @@ def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
             f"{metric}.max",
             f"{metric}.std",
         ]
-    ]
     def custom_table(styler):
         styler.background_gradient(cmap="seismic", axis=0)
@@ -321,15 +477,12 @@ def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
         .reset_index()
         .pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
     )
-    metrics_per_building_table.insert(
-        0, "median", metrics_per_building_table.median(axis=1)
-    )
     metrics_per_building_table.insert(
         0, "mean", metrics_per_building_table.mean(axis=1)
     )
-    metrics_per_building_table = metrics_per_building_table.sort_values(by="mean")
-    def custom_table(styler):
         styler.background_gradient(cmap="seismic", axis=None)
         styler.format(precision=2)
@@ -342,29 +495,54 @@ def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
     st.dataframe(styled_table, use_container_width=True)
-def computation_view(data, models_to_plot: set[str]):
     data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
         by="model", ascending=True
     )
-    st.markdown("#### Computational Resources")
-    fig = px.parallel_coordinates(
-        data_to_plot.groupby("model").mean(numeric_only=True).reset_index(),
-        dimensions=[
-            "model",
-            "resource_usage.CPU",
-            "resource_usage.memory",
-            "MAE.mean",
-            "RMSE.mean",
-            "MBE.mean",
-            "rMAE.mean",
-        ],
-        color="rMAE.mean",
-        color_continuous_scale=px.colors.diverging.Portland,
     )
-    st.plotly_chart(fig, use_container_width=True)
-    st.divider()
     left, center, right = st.columns(3, gap="small")
     with left:
@@ -381,17 +559,43 @@ def computation_view(data, models_to_plot: set[str]):
     st.markdown(
         f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
     )
-    aggregated_data = (
-        data_to_plot.groupby("model")
-        .agg(aggregation_per_building, numeric_only=True)
-        .reset_index()
-    )
-    fig = px.scatter(
-        aggregated_data,
-        x="resource_usage.CPU",
-        y=f"{metric}.{aggregation_per_model}",
-        color="model",
-        log_x=True,
-    )
-    fig.update_layout(height=600)
-    st.plotly_chart(fig, use_container_width=True)

 import pandas as pd
 import plotly.express as px
+import streamlit as st
+from pandas.io.formats.style import Styler
+from utils import get_leaderboard, get_model_ranks
+def header(title: str) -> None:
+    st.title(title)
+    st.markdown(
+        """
+    [EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit)
+    is a community driven benchmarking framework for energy forecasting models.
+    """
+    )
     st.divider()
         st.image("./images/energyville_logo.png")
+def links(current: str) -> None:
+    st.header("Sources")
+    st.link_button(
+        "GitHub Repository",
+        url="https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit",
+        use_container_width=True,
+    )
+    st.link_button(
+        "Documentation",
+        url="https://attila-balint-kul.github.io/energy-forecast-benchmark-toolkit/",
+        use_container_width=True,
+    )
+    st.link_button(
+        "Electricity Demand Dataset",
+        url="https://huggingface.co/datasets/EDS-lab/electricity-demand",
+        use_container_width=True,
+    )
+    st.link_button(
+        "HuggingFace Organization",
+        url="https://huggingface.co/EDS-lab",
+        use_container_width=True,
+    )
+    st.header("Other Dashboards")
+    if current != "ElectricityDemand":
+        st.link_button(
+            "Electricity Demand",
+            url="https://huggingface.co/spaces/EDS-lab/EnFoBench-ElectricityDemand",
+            use_container_width=True,
+        )
+    if current != "GasDemand":
+        st.link_button(
+            "Gas Demand",
+            url="https://huggingface.co/spaces/EDS-lab/EnFoBench-GasDemand",
+            use_container_width=True,
+        )
+    if current != "PVGeneration":
+        st.link_button(
+            "PVGeneration",
+            url="https://huggingface.co/spaces/EDS-lab/EnFoBench-PVGeneration",
+            use_container_width=True,
+        )
+def model_selector(models: list[str], data: pd.DataFrame) -> set[str]:
     # Group models by their prefix
     model_groups: dict[str, list[str]] = {}
     for model in models:
     models_to_plot = set()
     st.header("Models to include")
+    left, middle, right = st.columns(3)
+    with left:
+        best_by_mae = st.button("Best by MAE", use_container_width=True)
+        if best_by_mae:
+            best_models_by_mae = get_model_ranks(data, "MAE.mean").head(10).model.tolist()
+            for model in models:
+                if model in best_models_by_mae:
+                    st.session_state[model] = True
+                else:
+                    st.session_state[model] = False
+    with middle:
+        best_by_rmse = st.button("Best by RMSE", use_container_width=True)
+        if best_by_rmse:
+            best_models_by_rmse = get_model_ranks(data, "RMSE.mean").head(10).model.tolist()
+            for model in models:
+                if model in best_models_by_rmse:
+                    st.session_state[model] = True
+                else:
+                    st.session_state[model] = False
+    with right:
+        best_by_rmae = st.button("Best by rMAE", use_container_width=True)
+        if best_by_rmae:
+            best_models_by_rmae = get_model_ranks(data, "rMAE.mean").head(10).model.tolist()
+            for model in models:
+                if model in best_models_by_rmae:
+                    st.session_state[model] = True
+                else:
+                    st.session_state[model] = False
     left, right = st.columns(2)
     with left:
         select_none = st.button("Select None", use_container_width=True)
     return models_to_plot
+def overview_view(data: pd.DataFrame):
     st.markdown("## Leaderboard")
     leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"])
         )
         fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index)
         fig.update_layout(
+            title="Top 10 models by MAE",
+            xaxis_title="",
+            yaxis_title="Model",
+            height=600,
         )
         st.plotly_chart(fig, use_container_width=True)
             .sort_values("RMSE.mean")
         )
         fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index)
+        fig.update_layout(
+            title="Top 10 models by RMSE", xaxis_title="", yaxis_title="", height=600
+        )
         st.plotly_chart(fig, use_container_width=True)
     with right:
             .sort_values("rMAE.mean")
         )
         fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index)
+        fig.update_layout(
+            title="Top 10 models by rMAE", xaxis_title="", yaxis_title="", height=600
+        )
         st.plotly_chart(fig, use_container_width=True)
     st.dataframe(leaderboard, use_container_width=True)
+def buildings_view(data: pd.DataFrame):
+    if 'metadata.cluster_size' not in data.columns:
+        data['metadata.cluster_size'] = 1
+    if 'metadata.building_class' not in data.columns:
+        data['metadata.building_class'] = "Unknown"
     buildings = (
         data[
             [
                 "unique_id",
+                "metadata.cluster_size",
+                "metadata.building_class",
                 "metadata.location_id",
                 "metadata.timezone",
                 "dataset.available_history.days",
+                "dataset.available_history.observations",
+                "metadata.freq",
             ]
         ]
         .groupby("unique_id")
         .first()
         .rename(
             columns={
+                "metadata.cluster_size": "Cluster size",
+                "metadata.building_class": "Building class",
                 "metadata.location_id": "Location ID",
                 "metadata.timezone": "Timezone",
                 "dataset.available_history.days": "Available history (days)",
+                "dataset.available_history.observations": "Available history (#)",
+                "metadata.freq": "Frequency",
             }
         )
     )
+    left, middle, right = st.columns(3)
+    with left:
+        st.metric("Number of buildings", data["unique_id"].nunique())
+    with middle:
+        st.metric(
+            "Residential",
+            data[data["metadata.building_class"] == "Residential"][
+                "unique_id"
+            ].nunique(),
+        )
+    with right:
+        st.metric(
+            "Commercial",
+            data[data["metadata.building_class"] == "Commercial"][
+                "unique_id"
+            ].nunique(),
+        )
     st.divider()
+    left, middle, right = st.columns(3, gap="large")
+    with left:
+        st.markdown("#### Building classes")
+        fig = px.pie(
+            buildings.groupby("Building class").size().reset_index(),
+            values=0,
+            names="Building class",
+        )
+        fig.update_layout(
+            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    with middle:
+        st.markdown("#### Timezones")
+        fig = px.pie(
+            buildings.groupby("Timezone").size().reset_index(),
+            values=0,
+            names="Timezone",
+        )
+        fig.update_layout(
+            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    with right:
+        st.markdown("#### Frequencies")
+        fig = px.pie(
+            buildings.groupby("Frequency").size().reset_index(),
+            values=0,
+            names="Frequency",
+        )
+        fig.update_layout(
+            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    st.markdown("#### Buildings")
     st.dataframe(
+        buildings.sort_values("Available history (days)"),
         use_container_width=True,
         column_config={
             "Available history (days)": st.column_config.ProgressColumn(
                 min_value=0,
                 max_value=float(buildings["Available history (days)"].max()),
             ),
+            "Available history (#)": st.column_config.ProgressColumn(
+                "Available history (#)",
                 help="Available training data during the first prediction.",
                 format="%f",
                 min_value=0,
+                max_value=float(buildings["Available history (#)"].max()),
             ),
         },
     )
+def models_view(data: pd.DataFrame):
     models = (
         data[
             [
         )
     )
+    left, middle, right = st.columns(3)
+    with left:
+        st.metric("Models", len(models))
+    with middle:
+        st.metric(
+            "Univariate",
+            data[data["model_info.variate_type"] == "univariate"]["model"].nunique(),
+        )
+    with right:
+        st.metric(
+            "Univariate",
+            data[data["model_info.variate_type"] == "multivariate"]["model"].nunique(),
+        )
     st.divider()
     left, right = st.columns(2, gap="large")
     with left:
         st.markdown("#### Variate types")
         )
         st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    st.markdown("### Models")
+    st.dataframe(models, use_container_width=True)
+def accuracy_view(data: pd.DataFrame, models_to_plot: set[str]):
     data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
         by="model", ascending=True
     )
         )
     st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
+    if data_to_plot.empty:
+        st.warning("No data to display.")
+    else:
+        model_ranks = get_model_ranks(data_to_plot, f"{metric}.{aggregation}")
+        fig = px.box(
+            data_to_plot.merge(model_ranks, on="model").sort_values(by="rank"),
+            x=f"{metric}.{aggregation}",
+            y="model",
+            color="model",
+            points="all",
+        )
+        fig.update_layout(showlegend=False, height=50 * len(models_to_plot))
+        st.plotly_chart(fig, use_container_width=True)
     st.divider()
     st.markdown(
         f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
     )
+    if data_to_plot.empty:
+        st.warning("No data to display.")
+    else:
+        fig = px.scatter(
+            data_to_plot,
+            x=f"{x_metric}.{x_aggregation}",
+            y=f"{y_metric}.{y_aggregation}",
+            color="model",
+        )
+        fig.update_layout(height=600)
+        st.plotly_chart(fig, use_container_width=True)
     st.divider()
             f"{metric}.max",
             f"{metric}.std",
         ]
+    ].sort_values(by=f"{metric}.mean")
     def custom_table(styler):
         styler.background_gradient(cmap="seismic", axis=0)
         .reset_index()
         .pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
     )
     metrics_per_building_table.insert(
         0, "mean", metrics_per_building_table.mean(axis=1)
     )
+    metrics_per_building_table = metrics_per_building_table.sort_values(by="mean").drop(columns="mean")
+    def custom_table(styler: Styler):
         styler.background_gradient(cmap="seismic", axis=None)
         styler.format(precision=2)
     st.dataframe(styled_table, use_container_width=True)
+def relative_performance_view(data: pd.DataFrame, models_to_plot: set[str]):
     data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
         by="model", ascending=True
     )
+    st.markdown("#### Relative performance")
+    if data_to_plot.empty:
+        st.warning("No data to display.")
+    else:
+        baseline_choices = sorted(
+            data.filter(like="better_than")
+            .columns.str.removeprefix("better_than.")
+            .tolist()
+        )
+        if len(baseline_choices) > 1:
+            better_than_baseline = st.selectbox("Baseline model", options=baseline_choices)
+        else:
+            better_than_baseline = baseline_choices[0]
+        data_to_plot.loc[:, f"better_than.{better_than_baseline}.percentage"] = (
+            pd.json_normalize(data_to_plot[f"better_than.{better_than_baseline}"])[
+                "percentage"
+            ].values
+            * 100
+        )
+        model_rank = get_model_ranks(data_to_plot, f"better_than.{better_than_baseline}.percentage")
+        fig = px.box(
+            data_to_plot.merge(model_rank).sort_values(by="rank"),
+            x=f"better_than.{better_than_baseline}.percentage",
+            y="model",
+            points="all",
+        )
+        fig.update_xaxes(range=[0, 100], title_text="Better than baseline (%)")
+        fig.update_layout(
+            showlegend=False,
+            height=50 * len(models_to_plot),
+            title=f"Better than {better_than_baseline} on % of days per building",
+        )
+        st.plotly_chart(fig, use_container_width=True)
+def computation_view(data: pd.DataFrame, models_to_plot: set[str]):
+    data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
+        by="model", ascending=True
     )
+    data_to_plot["resource_usage.CPU"] /= 3600
+    st.markdown("#### Computational Resources")
     left, center, right = st.columns(3, gap="small")
     with left:
     st.markdown(
         f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
     )
+    if data_to_plot.empty:
+        st.warning("No data to display.")
+    else:
+        aggregated_data = (
+            data_to_plot.groupby("model")
+            .agg(aggregation_per_building, numeric_only=True)
+            .reset_index()
+        )
+        fig = px.scatter(
+            aggregated_data,
+            x="resource_usage.CPU",
+            y=f"{metric}.{aggregation_per_model}",
+            color="model",
+            log_x=True,
+        )
+        fig.update_layout(height=600)
+        fig.update_xaxes(title_text="CPU usage (hours)")
+        fig.update_yaxes(
+            title_text=f"{metric} ({aggregation_per_building}, {aggregation_per_model})"
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    st.markdown("#### Computational time vs historical data")
+    if data_to_plot.empty:
+        st.warning("No data to display.")
+    else:
+        fig = px.scatter(
+            data_to_plot,
+            x="dataset.available_history.observations",
+            y="resource_usage.CPU",
+            color="model",
+            trendline="ols",
+            hover_data=["model", "unique_id"],
+        )
+        fig.update_layout(height=600)
+        fig.update_xaxes(title_text="Available historical observations (#)")
+        fig.update_yaxes(title_text="CPU usage (hours)")
+        st.plotly_chart(fig, use_container_width=True)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 wandb==0.17.0
-plotly==5.20.0

 wandb==0.17.0
+plotly==5.20.0
+statsmodels==0.14.2

utils.py CHANGED Viewed

@@ -2,7 +2,9 @@ import pandas as pd
 import wandb
-def get_wandb_data(entity: str, project: str, api_key: str, job_type: str) -> pd.DataFrame:
     api = wandb.Api(api_key=api_key)
     # Project is specified by <entity/project-name>
@@ -17,7 +19,7 @@ def get_wandb_data(entity: str, project: str, api_key: str, job_type: str) -> pd
         # .config contains the hyperparameters.
         #  We remove special values that start with _.
-        config_list.append({k: v for k, v in run.config.items()})
         # .name is the human-readable name of the run.
         name_list.append(run.name)
@@ -30,10 +32,9 @@ def get_wandb_data(entity: str, project: str, api_key: str, job_type: str) -> pd
 def get_leaderboard(runs_df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
-    leaderboard = pd.DataFrame(
-        index=runs_df['model'].unique(),
-        columns=metrics
-    ).fillna(0)
     for _, building_df in runs_df.groupby("unique_id"):
         for column in leaderboard.columns:
@@ -42,3 +43,14 @@ def get_leaderboard(runs_df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
     leaderboard = leaderboard.sort_values(by=list(leaderboard.columns), ascending=False)
     return leaderboard

 import wandb
+def get_wandb_data(
+    entity: str, project: str, api_key: str, job_type: str
+) -> pd.DataFrame:
     api = wandb.Api(api_key=api_key)
     # Project is specified by <entity/project-name>
         # .config contains the hyperparameters.
         #  We remove special values that start with _.
+        config_list.append(run.config)
         # .name is the human-readable name of the run.
         name_list.append(run.name)
 def get_leaderboard(runs_df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
+    leaderboard = pd.DataFrame(index=runs_df["model"].unique(), columns=metrics).fillna(
+        0
+    )
     for _, building_df in runs_df.groupby("unique_id"):
         for column in leaderboard.columns:
     leaderboard = leaderboard.sort_values(by=list(leaderboard.columns), ascending=False)
     return leaderboard
+def get_model_ranks(runs_df: pd.DataFrame, metric: str) -> pd.DataFrame:
+    return (
+        runs_df.groupby(["model"])
+        .median(numeric_only=True)
+        .sort_values(by=metric)
+        .reset_index()
+        .rename_axis("rank")
+        .reset_index()[["rank", "model"]]
+    )