attila-balint-kul's picture
added leaderboard and sources
dad7dd9 verified
raw
history blame
1.61 kB
import pandas as pd
import wandb
def get_wandb_data(entity: str, project: str, api_key: str, job_type: str) -> pd.DataFrame:
api = wandb.Api(api_key=api_key)
# Project is specified by <entity/project-name>
filter_dict = {"jobType": job_type}
runs = api.runs(f"{entity}/{project}", filters=filter_dict)
summary_list, config_list, name_list = [], [], []
for run in runs:
# .summary contains the output keys/values for metrics like accuracy.
# We call ._json_dict to omit large files
summary_list.append(run.summary._json_dict)
# .config contains the hyperparameters.
# We remove special values that start with _.
config_list.append({k: v for k, v in run.config.items()})
# .name is the human-readable name of the run.
name_list.append(run.name)
summary_df = pd.json_normalize(summary_list, max_level=1)
config_df = pd.json_normalize(config_list, max_level=2)
runs_df = pd.concat([summary_df, config_df], axis=1)
runs_df.index = name_list
return runs_df
def get_leaderboard(runs_df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
leaderboard = pd.DataFrame(
index=runs_df['model'].unique(),
columns=metrics
).fillna(0)
for _, building_df in runs_df.groupby("unique_id"):
for column in leaderboard.columns:
best_model = building_df.loc[building_df[column].idxmin()].model
leaderboard.loc[best_model, column] += 1
leaderboard = leaderboard.sort_values(by=list(leaderboard.columns), ascending=False)
return leaderboard