eduagarcia commited on
Commit
ec3a730
1 Parent(s): 92ec1df

Enable clean initialization without any evaluate model

Browse files
Files changed (3) hide show
  1. src/display/utils.py +8 -0
  2. src/populate.py +1 -0
  3. src/tools/plots.py +5 -2
src/display/utils.py CHANGED
@@ -85,6 +85,14 @@ baseline_row = {
85
  AutoEvalColumn.dummy.name: "baseline",
86
  AutoEvalColumn.model_type.name: "",
87
  AutoEvalColumn.flagged.name: False,
 
 
 
 
 
 
 
 
88
  }
89
 
90
  # Average ⬆️ human baseline is 0.897 (source: averaging human baselines below)
 
85
  AutoEvalColumn.dummy.name: "baseline",
86
  AutoEvalColumn.model_type.name: "",
87
  AutoEvalColumn.flagged.name: False,
88
+ AutoEvalColumn.model_type_symbol.name: None,
89
+ AutoEvalColumn.architecture.name: None,
90
+ AutoEvalColumn.weight_type.name: None,
91
+ AutoEvalColumn.params.name: None,
92
+ AutoEvalColumn.likes.name: None,
93
+ AutoEvalColumn.license.name: None,
94
+ AutoEvalColumn.still_on_hub.name: None,
95
+ AutoEvalColumn.moe.name: None
96
  }
97
 
98
  # Average ⬆️ human baseline is 0.897 (source: averaging human baselines below)
src/populate.py CHANGED
@@ -17,6 +17,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,
17
 
18
  df = pd.DataFrame.from_records(all_data_json)
19
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
 
20
  df = df[cols].round(decimals=2)
21
 
22
  # filter out if any of the benchmarks have not been produced
 
17
 
18
  df = pd.DataFrame.from_records(all_data_json)
19
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
20
+
21
  df = df[cols].round(decimals=2)
22
 
23
  # filter out if any of the benchmarks have not been produced
src/tools/plots.py CHANGED
@@ -17,7 +17,10 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
17
  :return: A new DataFrame containing the maximum scores until each date for every metric.
18
  """
19
  # Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it
20
- results_df = pd.DataFrame(raw_data)
 
 
 
21
  #results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True)
22
  results_df.sort_values(by="date", inplace=True)
23
 
@@ -49,7 +52,7 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
49
  last_date = current_date
50
 
51
  # Step 4: Return all dictionaries as DataFrames
52
- return {k: pd.DataFrame(v) for k, v in scores.items()}
53
 
54
 
55
  def create_plot_df(scores_df: dict[str: pd.DataFrame]) -> pd.DataFrame:
 
17
  :return: A new DataFrame containing the maximum scores until each date for every metric.
18
  """
19
  # Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it
20
+
21
+ #create dataframe with EvalResult dataclass columns, even if raw_data is empty
22
+ results_df = pd.DataFrame(raw_data, columns=EvalResult.__dataclass_fields__.keys())
23
+
24
  #results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True)
25
  results_df.sort_values(by="date", inplace=True)
26
 
 
52
  last_date = current_date
53
 
54
  # Step 4: Return all dictionaries as DataFrames
55
+ return {k: pd.DataFrame(v, columns=["model", "date", "score"]) for k, v in scores.items()}
56
 
57
 
58
  def create_plot_df(scores_df: dict[str: pd.DataFrame]) -> pd.DataFrame: