import fire import json import pandas as pd import pickle def main( model_info_file: str, elo_rating_pkl: str, output_csv: str ): model_info = json.load(open(model_info_file)) with open(elo_rating_pkl, "rb") as fin: elo_rating_results = pickle.load(fin) # Model, Dim Elo rating (anony), Arena Elo rating (anony), Link, Orgnization model_ratings = model_info fields = ["key", "Model"] for dim, dim_results in elo_rating_results.items(): anony_elo_rating_results = dim_results["anony"] full_elo_rating_results = dim_results["full"] anony_leaderboard_data = anony_elo_rating_results["leaderboard_table_df"] full_leaderboard_data = full_elo_rating_results["leaderboard_table_df"] fields += [f"{dim} Elo rating"] all_models = anony_leaderboard_data.index.tolist() for model in all_models: if not model in model_ratings: # set Organization and license to empty model_ratings[model] = {} model_ratings[model]["Organization"] = "N/A" model_ratings[model]["Link"] = "N/A" model_ratings[model]["Model"] = model model_ratings[model]["key"] = model if model in anony_leaderboard_data.index: model_ratings[model][f"{dim} Elo rating"] = anony_leaderboard_data.loc[model, "rating"] else: model_ratings[model][f"{dim} Elo rating"] = 0 if "Arena Elo rating" not in model_ratings[model].keys(): model_ratings[model]["Arena Elo rating"] = 0 model_ratings[model]["Arena Elo rating"] += model_ratings[model][f"{dim} Elo rating"] ## Anony # if model in anony_leaderboard_data.index: # model_ratings[model][f"{dim} Elo rating (anony)"] = anony_leaderboard_data.loc[model, "rating"] # else: # model_ratings[model][f"{dim} Elo rating (anony)"] = 0 # if "Arena Elo rating (anony)" not in model_ratings[model].keys(): # model_ratings[model]["Arena Elo rating (anony)"] = 0 # model_ratings[model]["Arena Elo rating (anony)"] += model_ratings[model][f"{dim} Elo rating (anony)"] ## Anony + Named # if model in full_elo_rating_results["leaderboard_table_df"].index: # model_ratings[model][f"{dim} Elo rating (full)"] = full_leaderboard_data.loc[model, "rating"] # else: # model_ratings[model][f"{dim} Elo rating (full)"] = 0 # if "Arena Elo rating (full)" not in model_ratings[model].keys(): # model_ratings[model]["Arena Elo rating (full)"] = 0 # model_ratings[model]["Arena Elo rating (full)"] += model_ratings[model][f"{dim} Elo rating (full)"] fields += ["Arena Elo rating", "Link", "Organization"] # fields += ["Arena Elo rating (anony)", "Arena Elo rating (full)", "Link", "Organization"] final_model_info = {} print(model_ratings) for model in model_ratings: if "Model" in model_ratings[model]: # model_ratings[model]["Arena Elo rating (anony)"] /= 5 # model_ratings[model]["Arena Elo rating (full)"] /= 5 model_ratings[model]["Arena Elo rating"] /= 5 final_model_info[model] = model_ratings[model] model_info = final_model_info exclude_keys = ['starting_from'] for key in exclude_keys: for model in model_info: if key in model_info[model]: del model_info[model][key] df = pd.DataFrame(model_info).T df = df[fields] # sort by anony rating df = df.sort_values(by=["Arena Elo rating"], ascending=False) df.to_csv(output_csv, index=False) print("Leaderboard data saved to", output_csv) print(df) if __name__ == "__main__": fire.Fire(main)