Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
meg-huggingface
commited on
Commit
•
dd0583d
1
Parent(s):
c3d29b7
Removing logging bug
Browse files- src/leaderboard/read_evals.py +11 -14
src/leaderboard/read_evals.py
CHANGED
@@ -11,10 +11,7 @@ from src.display.formatting import make_clickable_model
|
|
11 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
12 |
from src.submission.check_validity import is_model_on_hub
|
13 |
|
14 |
-
from src.logging import setup_logger, log_file
|
15 |
-
|
16 |
logging.basicConfig(level=logging.DEBUG)
|
17 |
-
logger = setup_logger(__name__)
|
18 |
|
19 |
|
20 |
@dataclass
|
@@ -75,13 +72,13 @@ class EvalResult:
|
|
75 |
results = {}
|
76 |
for task in Tasks:
|
77 |
task = task.value
|
78 |
-
|
79 |
-
|
80 |
# We average all scores of a given metric (not all metrics are present in all files)
|
81 |
# This looks a bit odd, should just be the one score in the one file. (?)
|
82 |
scores = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
83 |
-
|
84 |
-
|
85 |
if scores.size == 0 or any([score is None for score in scores]):
|
86 |
continue
|
87 |
|
@@ -114,7 +111,7 @@ class EvalResult:
|
|
114 |
self.num_params = request.get("params", 0)
|
115 |
self.date = request.get("submitted_time", "")
|
116 |
except Exception:
|
117 |
-
|
118 |
|
119 |
def to_dict(self):
|
120 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
@@ -166,8 +163,8 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
166 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
167 |
"""From the path of the results folder root, extract all needed info for results"""
|
168 |
model_result_filepaths = []
|
169 |
-
|
170 |
-
|
171 |
for root, _, files in os.walk(results_path):
|
172 |
# We should only have json files in model results
|
173 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
@@ -184,8 +181,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
184 |
|
185 |
eval_results = {}
|
186 |
for model_result_filepath in model_result_filepaths:
|
187 |
-
|
188 |
-
|
189 |
# Creation of result
|
190 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
191 |
eval_result.update_with_request_file(requests_path)
|
@@ -196,8 +193,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
196 |
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
197 |
else:
|
198 |
eval_results[eval_name] = eval_result
|
199 |
-
|
200 |
-
|
201 |
|
202 |
results = []
|
203 |
for v in eval_results.values():
|
|
|
11 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
12 |
from src.submission.check_validity import is_model_on_hub
|
13 |
|
|
|
|
|
14 |
logging.basicConfig(level=logging.DEBUG)
|
|
|
15 |
|
16 |
|
17 |
@dataclass
|
|
|
72 |
results = {}
|
73 |
for task in Tasks:
|
74 |
task = task.value
|
75 |
+
logging.info("Task: %s" % task.metric)
|
76 |
+
logging.info(data["results"].items())
|
77 |
# We average all scores of a given metric (not all metrics are present in all files)
|
78 |
# This looks a bit odd, should just be the one score in the one file. (?)
|
79 |
scores = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
80 |
+
logging.info("scores are:")
|
81 |
+
logging.info(scores)
|
82 |
if scores.size == 0 or any([score is None for score in scores]):
|
83 |
continue
|
84 |
|
|
|
111 |
self.num_params = request.get("params", 0)
|
112 |
self.date = request.get("submitted_time", "")
|
113 |
except Exception:
|
114 |
+
logging.error(f"Could not find request file for {self.org}/{self.model}") #with precision {self.precision.value.name}")
|
115 |
|
116 |
def to_dict(self):
|
117 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
|
|
163 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
164 |
"""From the path of the results folder root, extract all needed info for results"""
|
165 |
model_result_filepaths = []
|
166 |
+
logging.debug('looking in results_path: %s' % results_path)
|
167 |
+
logging.debug('looking in requests_path: %s' % requests_path)
|
168 |
for root, _, files in os.walk(results_path):
|
169 |
# We should only have json files in model results
|
170 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
|
|
181 |
|
182 |
eval_results = {}
|
183 |
for model_result_filepath in model_result_filepaths:
|
184 |
+
logging.debug("Examining filepath:")
|
185 |
+
logging.debug(model_result_filepath)
|
186 |
# Creation of result
|
187 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
188 |
eval_result.update_with_request_file(requests_path)
|
|
|
193 |
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
194 |
else:
|
195 |
eval_results[eval_name] = eval_result
|
196 |
+
logging.info("eval results is")
|
197 |
+
logging.info(eval_results)
|
198 |
|
199 |
results = []
|
200 |
for v in eval_results.values():
|