Clémentine commited on
Commit
90fa47e
1 Parent(s): 3df8919

Incorrectly tagged merges are now flagged

Browse files
src/leaderboard/filter_models.py CHANGED
@@ -40,6 +40,7 @@ FLAGGED_MODELS = {
40
  "rwitz2/pee": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
41
  "zyh3826 / GML-Mistral-merged-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/503",
42
  "dillfrescott/trinity-medium": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
 
43
  }
44
 
45
  # Models which have been requested by orgs to not be submitted on the leaderboard
@@ -53,10 +54,16 @@ DO_NOT_SUBMIT_MODELS = [
53
 
54
  def flag_models(leaderboard_data: list[dict]):
55
  for model_data in leaderboard_data:
56
- if model_data["model_name_for_query"] in FLAGGED_MODELS:
57
- issue_num = FLAGGED_MODELS[model_data["model_name_for_query"]].split("/")[-1]
 
 
 
 
 
 
58
  issue_link = model_hyperlink(
59
- FLAGGED_MODELS[model_data["model_name_for_query"]],
60
  f"See discussion #{issue_num}",
61
  )
62
  model_data[
 
40
  "rwitz2/pee": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
41
  "zyh3826 / GML-Mistral-merged-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/503",
42
  "dillfrescott/trinity-medium": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
43
+ "merged": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
44
  }
45
 
46
  # Models which have been requested by orgs to not be submitted on the leaderboard
 
54
 
55
  def flag_models(leaderboard_data: list[dict]):
56
  for model_data in leaderboard_data:
57
+ # Merges are flagged automatically
58
+ if model_data[AutoEvalColumn.flagged.name] == True:
59
+ flag_key = "merged"
60
+ else:
61
+ flag_key = model_data["model_name_for_query"]
62
+
63
+ if flag_key in FLAGGED_MODELS:
64
+ issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
65
  issue_link = model_hyperlink(
66
+ FLAGGED_MODELS[flag_key],
67
  f"See discussion #{issue_num}",
68
  )
69
  model_data[
src/leaderboard/read_evals.py CHANGED
@@ -11,7 +11,7 @@ from huggingface_hub import ModelCard
11
 
12
  from src.display.formatting import make_clickable_model
13
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
14
- from src.submission.check_validity import is_model_on_hub
15
 
16
 
17
  @dataclass
@@ -32,7 +32,8 @@ class EvalResult:
32
  num_params: int = 0
33
  date: str = "" # submission date of request file
34
  still_on_hub: bool = False
35
- merge: bool = False
 
36
 
37
  @classmethod
38
  def init_from_json_file(self, json_filepath):
@@ -60,11 +61,6 @@ class EvalResult:
60
  result_key = f"{org}_{model}_{precision.value.name}"
61
  full_model = "/".join(org_and_model)
62
 
63
- try:
64
- merge = any(t in ["merge", "mergedlm"] for t in ModelCard.load(full_model).data.tags)
65
- except Exception:
66
- merge = False
67
-
68
  still_on_hub, error, model_config = is_model_on_hub(
69
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
70
  )
@@ -74,6 +70,28 @@ class EvalResult:
74
  if architectures:
75
  architecture = ";".join(architectures)
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # Extract results available in this file (some results are split in several files)
78
  results = {}
79
  for task in Tasks:
@@ -112,7 +130,8 @@ class EvalResult:
112
  revision= config.get("model_sha", ""),
113
  still_on_hub=still_on_hub,
114
  architecture=architecture,
115
- merge=merge
 
116
  )
117
 
118
  def update_with_request_file(self, requests_path):
@@ -138,8 +157,8 @@ class EvalResult:
138
  "eval_name": self.eval_name, # not a column, just a save name,
139
  AutoEvalColumn.precision.name: self.precision.value.name,
140
  AutoEvalColumn.model_type.name: self.model_type.value.name,
141
- AutoEvalColumn.merged.name: self.merge,
142
- AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
143
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
144
  AutoEvalColumn.architecture.name: self.architecture,
145
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
@@ -150,6 +169,8 @@ class EvalResult:
150
  AutoEvalColumn.likes.name: self.likes,
151
  AutoEvalColumn.params.name: self.num_params,
152
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
 
 
153
  }
154
 
155
  for task in Tasks:
 
11
 
12
  from src.display.formatting import make_clickable_model
13
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
14
+ from src.submission.check_validity import is_model_on_hub, check_model_card
15
 
16
 
17
  @dataclass
 
32
  num_params: int = 0
33
  date: str = "" # submission date of request file
34
  still_on_hub: bool = False
35
+ is_merge: bool = False
36
+ flagged: bool = False
37
 
38
  @classmethod
39
  def init_from_json_file(self, json_filepath):
 
61
  result_key = f"{org}_{model}_{precision.value.name}"
62
  full_model = "/".join(org_and_model)
63
 
 
 
 
 
 
64
  still_on_hub, error, model_config = is_model_on_hub(
65
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
66
  )
 
70
  if architectures:
71
  architecture = ";".join(architectures)
72
 
73
+ # If the model doesn't have a model card or a license, we consider it's deleted
74
+ if still_on_hub:
75
+ try:
76
+ if check_model_card(full_model)[0] is False:
77
+ still_on_hub = False
78
+ except Exception:
79
+ still_on_hub = False
80
+
81
+ # Check if the model is a merge
82
+ is_merge_from_metadata = False
83
+ flagged = False
84
+ if still_on_hub:
85
+ model_card = ModelCard.load(full_model)
86
+
87
+ if model_card.data.tags:
88
+ is_merge_from_metadata = "merge" in model_card.data.tags
89
+ merge_keywords = ["mergekit", "merged model", "merge model"]
90
+ # If the model is a merge but not saying it in the metadata, we flag it
91
+ is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
92
+ flagged = is_merge_from_model_card and not is_merge_from_metadata
93
+
94
+
95
  # Extract results available in this file (some results are split in several files)
96
  results = {}
97
  for task in Tasks:
 
130
  revision= config.get("model_sha", ""),
131
  still_on_hub=still_on_hub,
132
  architecture=architecture,
133
+ is_merge=is_merge_from_metadata,
134
+ flagged=flagged,
135
  )
136
 
137
  def update_with_request_file(self, requests_path):
 
157
  "eval_name": self.eval_name, # not a column, just a save name,
158
  AutoEvalColumn.precision.name: self.precision.value.name,
159
  AutoEvalColumn.model_type.name: self.model_type.value.name,
160
+ AutoEvalColumn.merged.name: self.is_merge,
161
+ AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol, # + "🥦" if self.is_merge,
162
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
163
  AutoEvalColumn.architecture.name: self.architecture,
164
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
 
169
  AutoEvalColumn.likes.name: self.likes,
170
  AutoEvalColumn.params.name: self.num_params,
171
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
172
+ AutoEvalColumn.flagged.name: self.flagged
173
+
174
  }
175
 
176
  for task in Tasks:
src/submission/check_validity.py CHANGED
@@ -8,7 +8,6 @@ import huggingface_hub
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig, AutoTokenizer
11
- from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
12
 
13
  from src.envs import HAS_HIGHER_RATE_LIMIT
14
 
@@ -39,7 +38,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
39
 
40
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
41
  try:
42
- config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
43
  if test_tokenizer:
44
  try:
45
  tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
 
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig, AutoTokenizer
 
11
 
12
  from src.envs import HAS_HIGHER_RATE_LIMIT
13
 
 
38
 
39
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
40
  try:
41
+ config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
42
  if test_tokenizer:
43
  try:
44
  tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)