open_llm_leaderboard2

Runtime error

App Files Files Community

Clémentine commited on Jan 9

Commit

0c7ef71

•

1 Parent(s): 9d02a6b

wip

Browse files

Files changed (8) hide show

app.py +42 -23
src/envs.py +3 -0
src/leaderboard/read_evals.py +19 -43
src/populate.py +2 -2
{scripts → src/scripts}/create_request_file.py +5 -20
src/scripts/update_all_request_files.py +97 -0
src/submission/check_validity.py +13 -7
src/submission/submit.py +52 -3

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ from src.display.utils import (
     WeightType,
     Precision
 )
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
 from src.tools.collections import update_collections
@@ -43,33 +43,52 @@ enable_space_ci()
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
     )
-except Exception:
-    restart_space()
-raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-update_collections(original_df.copy())
-leaderboard_df = original_df.copy()
-plot_df = create_plot_df(create_scores_df(raw_data))
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 # Searching and filtering

     WeightType,
     Precision
 )
+from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
 from src.tools.collections import update_collections
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
+def init_space():
+    try:
+        print(EVAL_REQUESTS_PATH)
+        snapshot_download(
+            repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+        )
+    except Exception:
+        restart_space()
+    try:
+        print(DYNAMIC_INFO_PATH)
+        snapshot_download(
+            repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+        )
+    except Exception:
+        restart_space()
+    try:
+        print(EVAL_RESULTS_PATH)
+        snapshot_download(
+            repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+        )
+    except Exception:
+        restart_space()
+    raw_data, original_df = get_leaderboard_df(
+        results_path=EVAL_RESULTS_PATH,
+        requests_path=EVAL_REQUESTS_PATH,
+        dynamic_path=DYNAMIC_INFO_FILE_PATH,
+        cols=COLS,
+        benchmark_cols=BENCHMARK_COLS
     )
+    update_collections(original_df.copy())
+    leaderboard_df = original_df.copy()
+    plot_df = create_plot_df(create_scores_df(raw_data))
+    (
+        finished_eval_queue_df,
+        running_eval_queue_df,
+        pending_eval_queue_df,
+    ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+    return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
+leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
 # Searching and filtering

src/envs.py CHANGED Viewed

@@ -7,6 +7,7 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None)
 REPO_ID = "HuggingFaceH4/open_llm_leaderboard"
 QUEUE_REPO = "open-llm-leaderboard/requests"
 RESULTS_REPO = "open-llm-leaderboard/results"
 PRIVATE_QUEUE_REPO = "open-llm-leaderboard/private-requests"
@@ -18,6 +19,8 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
 EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
 EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
 EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"

 REPO_ID = "HuggingFaceH4/open_llm_leaderboard"
 QUEUE_REPO = "open-llm-leaderboard/requests"
+DYNAMIC_INFO_REPO = "open-llm-leaderboard/dynamic_model_information"
 RESULTS_REPO = "open-llm-leaderboard/results"
 PRIVATE_QUEUE_REPO = "open-llm-leaderboard/private-requests"
 EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
+DYNAMIC_INFO_PATH = os.path.join(CACHE_PATH, "dynamic-info")
+DYNAMIC_INFO_FILE_PATH = os.path.join(DYNAMIC_INFO_PATH, "model_infos.json")
 EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
 EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"

src/leaderboard/read_evals.py CHANGED Viewed

@@ -11,7 +11,6 @@ from huggingface_hub import ModelCard
 from src.display.formatting import make_clickable_model
 from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
-from src.submission.check_validity import is_model_on_hub, check_model_card
 @dataclass
@@ -34,6 +33,7 @@ class EvalResult:
     still_on_hub: bool = False
     is_merge: bool = False
     flagged: bool = False
     @classmethod
     def init_from_json_file(self, json_filepath):
@@ -42,13 +42,13 @@ class EvalResult:
             data = json.load(fp)
         # We manage the legacy config format
-        config = data.get("config", data.get("config_general", None))
         # Precision
         precision = Precision.from_str(config.get("model_dtype"))
         # Get model and org
-        org_and_model = config.get("model_name", config.get("model_args", None))
         org_and_model = org_and_model.split("/", 1)
         if len(org_and_model) == 1:
@@ -61,37 +61,6 @@ class EvalResult:
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
-        still_on_hub, error, model_config = is_model_on_hub(
-            full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
-        )
-        architecture = "?"
-        if model_config is not None:
-            architectures = getattr(model_config, "architectures", None)
-            if architectures:
-                architecture = ";".join(architectures)
-        # If the model doesn't have a model card or a license, we consider it's deleted
-        if still_on_hub:
-            try:
-                if check_model_card(full_model)[0] is False:
-                    still_on_hub = False
-            except Exception:
-                still_on_hub = False
-        # Check if the model is a merge
-        is_merge_from_metadata = False
-        flagged = False
-        if still_on_hub:
-            model_card = ModelCard.load(full_model)
-            if model_card.data.tags:
-                is_merge_from_metadata = "merge" in model_card.data.tags
-            merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
-            # If the model is a merge but not saying it in the metadata, we flag it
-            is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
-            flagged = is_merge_from_model_card and not is_merge_from_metadata
         # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
@@ -128,10 +97,6 @@ class EvalResult:
             results=results,
             precision=precision,
             revision= config.get("model_sha", ""),
-            still_on_hub=still_on_hub,
-            architecture=architecture,
-            is_merge=is_merge_from_metadata,
-            flagged=flagged,
         )
     def update_with_request_file(self, requests_path):
@@ -143,13 +108,21 @@ class EvalResult:
                 request = json.load(f)
             self.model_type = ModelType.from_str(request.get("model_type", ""))
             self.weight_type = WeightType[request.get("weight_type", "Original")]
-            self.license = request.get("license", "?")
-            self.likes = request.get("likes", 0)
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
         except Exception:
             print(f"Could not find request file for {self.org}/{self.model}")
     def to_dict(self):
         """Converts the Eval Result to a dict compatible with our dataframe display"""
         average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
@@ -158,7 +131,7 @@ class EvalResult:
             AutoEvalColumn.precision.name: self.precision.value.name,
             AutoEvalColumn.model_type.name: self.model_type.value.name,
             AutoEvalColumn.merged.name: self.is_merge,
-            AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol, # + "🥦" if self.is_merge,
             AutoEvalColumn.weight_type.name: self.weight_type.value.name,
             AutoEvalColumn.architecture.name: self.architecture,
             AutoEvalColumn.model.name: make_clickable_model(self.full_model),
@@ -170,7 +143,6 @@ class EvalResult:
             AutoEvalColumn.params.name: self.num_params,
             AutoEvalColumn.still_on_hub.name: self.still_on_hub,
             AutoEvalColumn.flagged.name: self.flagged
         }
         for task in Tasks:
@@ -201,7 +173,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
     return request_file
-def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
@@ -219,11 +191,15 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
         # Store results of same eval together
         eval_name = eval_result.eval_name

 from src.display.formatting import make_clickable_model
 from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
 @dataclass
     still_on_hub: bool = False
     is_merge: bool = False
     flagged: bool = False
+    tags: list = None
     @classmethod
     def init_from_json_file(self, json_filepath):
             data = json.load(fp)
         # We manage the legacy config format
+        config = data.get("config_general")
         # Precision
         precision = Precision.from_str(config.get("model_dtype"))
         # Get model and org
+        org_and_model = config.get("model_name")
         org_and_model = org_and_model.split("/", 1)
         if len(org_and_model) == 1:
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
         # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
             results=results,
             precision=precision,
             revision= config.get("model_sha", ""),
         )
     def update_with_request_file(self, requests_path):
                 request = json.load(f)
             self.model_type = ModelType.from_str(request.get("model_type", ""))
             self.weight_type = WeightType[request.get("weight_type", "Original")]
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
+            self.architecture = request["architectures"]
         except Exception:
             print(f"Could not find request file for {self.org}/{self.model}")
+    def update_with_dynamic_file_dict(self, file_dict):
+        self.license = file_dict.get("license", "?")
+        self.likes = file_dict.get("likes", 0)
+        self.still_on_hub = file_dict["still_on_hub"]
+        self.flagged = any("flagged" in tag for tag in file_dict["tags"])
+        self.is_merge = "merge" in file_dict["tags"]
+        self.tags = file_dict["tags"]
     def to_dict(self):
         """Converts the Eval Result to a dict compatible with our dataframe display"""
         average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
             AutoEvalColumn.precision.name: self.precision.value.name,
             AutoEvalColumn.model_type.name: self.model_type.value.name,
             AutoEvalColumn.merged.name: self.is_merge,
+            AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
             AutoEvalColumn.weight_type.name: self.weight_type.value.name,
             AutoEvalColumn.architecture.name: self.architecture,
             AutoEvalColumn.model.name: make_clickable_model(self.full_model),
             AutoEvalColumn.params.name: self.num_params,
             AutoEvalColumn.still_on_hub.name: self.still_on_hub,
             AutoEvalColumn.flagged.name: self.flagged
         }
         for task in Tasks:
     return request_file
+def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))
+    with open(dynamic_path) as f:
+        dynamic_data = json.load(f)
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
+        eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
         # Store results of same eval together
         eval_name = eval_result.eval_name

src/populate.py CHANGED Viewed

@@ -9,8 +9,8 @@ from src.leaderboard.filter_models import filter_models
 from src.leaderboard.read_evals import get_raw_eval_results
-def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
-    raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
     all_data_json.append(baseline_row)
     filter_models(all_data_json)

 from src.leaderboard.read_evals import get_raw_eval_results
+def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
+    raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     all_data_json.append(baseline_row)
     filter_models(all_data_json)

{scripts → src/scripts}/create_request_file.py RENAMED Viewed

@@ -1,36 +1,21 @@
 import json
 import os
 import pprint
-import re
 from datetime import datetime, timezone
 import click
 from colorama import Fore
 from huggingface_hub import HfApi, snapshot_download
 EVAL_REQUESTS_PATH = "eval-queue"
 QUEUE_REPO = "open-llm-leaderboard/requests"
 precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
-model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
-weight_types = ("Original", "Delta", "Adapter")
-def get_model_size(model_info, precision: str):
-    size_pattern =  re.compile(r"(\d+\.)?\d+(b|m)")
-    try:
-        model_size = round(model_info.safetensors["total"] / 1e9, 3)
-    except (AttributeError, TypeError):
-        try:
-            size_match = re.search(size_pattern, model_info.modelId.lower())
-            model_size = size_match.group(0)
-            model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
-        except AttributeError:
-            return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
-    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
-    model_size = size_factor * model_size
-    return model_size
 def main():

 import json
 import os
 import pprint
 from datetime import datetime, timezone
 import click
 from colorama import Fore
 from huggingface_hub import HfApi, snapshot_download
+from src.submission.check_validity import get_model_size
+from src.display.utils import ModelType, WeightType
 EVAL_REQUESTS_PATH = "eval-queue"
 QUEUE_REPO = "open-llm-leaderboard/requests"
 precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
+model_types = [e.name for e in ModelType]
+weight_types = [e.name for e in WeightType]
 def main():

src/scripts/update_all_request_files.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from huggingface_hub import HfApi, ModelFilter, snapshot_download
+from huggingface_hub import ModelCard
+import json
+import os
+import time
+import shutil
+from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_size
+from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, API
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+TMP_FOLDER = "tmp_requests"
+snapshot_download(
+    repo_id=DYNAMIC_INFO_REPO, local_dir=TMP_FOLDER, repo_type="dataset", tqdm_class=None, etag_timeout=30
+)
+# Get models
+start = time.time()
+models = list(API.list_models(
+    filter=ModelFilter(task="text-generation"),
+    full=False,
+    cardData=True,
+    fetch_config=True,
+))
+print(f"Downloaded list of models in {time.time() - start:.2f} seconds")
+def update_models(file_path, models):
+    """
+    Search through all JSON files in the specified root folder and its subfolders,
+    and update the likes key in JSON dict from value of input dict
+    """
+    with open(file_path, "r") as f:
+        model_infos = json.load(f)
+        for model_id, data in model_infos.items():
+            if model_id not in models:
+                continue
+            model_cfg = models[model_id]
+            data['likes'] = model_cfg.likes
+            #data['params'] = get_model_size(model_cfg, data['precision'])
+            data['license'] = model_cfg.card_data.license if model_cfg.card_data is not None else ""
+            # Is the model still on the hub
+            still_on_hub, error, model_config = is_model_on_hub(
+                model_name=model_id, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False
+            )
+            # If the model doesn't have a model card or a license, we consider it's deleted
+            if still_on_hub:
+                try:
+                    if check_model_card(model_id)[0] is False:
+                        still_on_hub = False
+                except Exception:
+                    still_on_hub = False
+            data['still_on_hub'] = still_on_hub
+            #  Check if the model is a merge
+            is_merge_from_metadata = False
+            if still_on_hub:
+                model_card = ModelCard.load(model_id)
+                # Storing the model metadata
+                tags = []
+                if model_card.data.tags:
+                    is_merge_from_metadata = "merge" in model_card.data.tags
+                merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
+                # If the model is a merge but not saying it in the metadata, we flag it
+                is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
+                if is_merge_from_model_card:
+                    tags.append("merge")
+                    if not is_merge_from_metadata:
+                        tags.append("flagged:undisclosed_merge")
+                if "moe" in model_card.data.tags:
+                    tags.append("moe")
+            data["tags"] = tags
+    with open(file_path, 'w') as f:
+        json.dump(model_infos, f, indent=2)
+start = time.time()
+updated_ids = update_models(DYNAMIC_INFO_FILE_PATH, models)
+print(f"updated in {time.time() - start:.2f} seconds, updated ids: {len(updated_ids)}")
+API.upload_file(
+    path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
+    path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
+    repo_id=DYNAMIC_INFO_REPO,
+    repo_type="dataset",
+    commit_message=f"Daily request file update.",
+)
+shutil.rmtree(TMP_FOLDER)

src/submission/check_validity.py CHANGED Viewed

@@ -6,7 +6,7 @@ from datetime import datetime, timedelta, timezone
 import huggingface_hub
 from huggingface_hub import ModelCard
-from huggingface_hub.hf_api import ModelInfo
 from transformers import AutoConfig, AutoTokenizer
 from src.envs import HAS_HIGHER_RATE_LIMIT
@@ -36,7 +36,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
     return True, ""
-def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
     try:
         config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
         if test_tokenizer:
@@ -65,17 +65,23 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
 def get_model_size(model_info: ModelInfo, precision: str):
     size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
     try:
-        model_size = round(model_info.safetensors["total"] / 1e9, 3)
-    except (AttributeError, TypeError ):
         try:
-            size_match = re.search(size_pattern, model_info.modelId.lower())
             model_size = size_match.group(0)
             model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
-        except AttributeError:
             return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
-    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
     model_size = size_factor * model_size
     return model_size

 import huggingface_hub
 from huggingface_hub import ModelCard
+from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata
 from transformers import AutoConfig, AutoTokenizer
 from src.envs import HAS_HIGHER_RATE_LIMIT
     return True, ""
+def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
     try:
         config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
         if test_tokenizer:
 def get_model_size(model_info: ModelInfo, precision: str):
     size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
+    safetensors = None
     try:
+        safetensors = get_safetensors_metadata(model_info.id)
+    except Exception as e:
+        print(e)
+    if safetensors is not None:
+        model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
+    else:
         try:
+            size_match = re.search(size_pattern, model_info.id.lower())
             model_size = size_match.group(0)
             model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
+        except AttributeError as e:
             return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
+    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
     model_size = size_factor * model_size
     return model_size

src/submission/submit.py CHANGED Viewed

@@ -2,8 +2,10 @@ import json
 import os
 from datetime import datetime, timezone
 from src.display.formatting import styled_error, styled_message, styled_warning
-from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
 from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
 from src.submission.check_validity import (
     already_submitted_models,
@@ -65,9 +67,15 @@ def add_new_eval(
             return styled_error(f'Base model "{base_model}" {error}')
     if not weight_type == "Adapter":
-        model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
         if not model_on_hub:
             return styled_error(f'Model "{model}" {error}')
     # Is the model info correctly filled?
     try:
@@ -86,6 +94,22 @@ def add_new_eval(
     modelcard_OK, error_msg = check_model_card(model)
     if not modelcard_OK:
         return styled_error(error_msg)
     # Seems good, creating the eval
     print("Adding new eval")
@@ -96,13 +120,21 @@ def add_new_eval(
         "revision": revision,
         "private": private,
         "precision": precision,
         "weight_type": weight_type,
         "status": "PENDING",
         "submitted_time": current_time,
         "model_type": model_type,
         "likes": model_info.likes,
-        "params": model_size,
         "license": license,
     }
     # Check for duplicate submission
@@ -126,6 +158,23 @@ def add_new_eval(
         commit_message=f"Add {model} to eval queue",
     )
     # Remove the local file
     os.remove(out_path)

 import os
 from datetime import datetime, timezone
+from huggingface_hub import ModelCard
 from src.display.formatting import styled_error, styled_message, styled_warning
+from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
 from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
 from src.submission.check_validity import (
     already_submitted_models,
             return styled_error(f'Base model "{base_model}" {error}')
     if not weight_type == "Adapter":
+        model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
         if not model_on_hub:
             return styled_error(f'Model "{model}" {error}')
+        architecture = "?"
+        if model_config is not None:
+            architectures = getattr(model_config, "architectures", None)
+            if architectures:
+                architecture = ";".join(architectures)
     # Is the model info correctly filled?
     try:
     modelcard_OK, error_msg = check_model_card(model)
     if not modelcard_OK:
         return styled_error(error_msg)
+    # Storing the model tags
+    tags = []
+    model_card = ModelCard.load(model)
+    is_merge_from_metadata = "merge" in model_card.data.tags if model_card.data.tags else False
+    merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
+    # If the model is a merge but not saying it in the metadata, we flag it
+    is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
+    if is_merge_from_model_card:
+        tags.append("merge")
+        if not is_merge_from_metadata:
+            tags.append("flagged:undisclosed_merge")
+    if "moe" in model_card.data.tags:
+        tags.append("moe")
     # Seems good, creating the eval
     print("Adding new eval")
         "revision": revision,
         "private": private,
         "precision": precision,
+        "params": model_size,
+        "architectures": architecture,
         "weight_type": weight_type,
         "status": "PENDING",
         "submitted_time": current_time,
         "model_type": model_type,
+        "job_id": -1,
+        "job_start_time": None,
+    }
+    supplementary_info = {
         "likes": model_info.likes,
         "license": license,
+        "still_on_hub": True,
+        "tags": tags,
     }
     # Check for duplicate submission
         commit_message=f"Add {model} to eval queue",
     )
+    with open(DYNAMIC_INFO_FILE_PATH) as f:
+        all_supplementary_info = json.load(f)
+    all_supplementary_info[model] = supplementary_info
+    with open(DYNAMIC_INFO_FILE_PATH, "w") as f:
+        json.dump(all_supplementary_info, f, indent=2)
+    API.upload_file(
+        path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
+        path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
+        repo_id=DYNAMIC_INFO_REPO,
+        repo_type="dataset",
+        commit_message=f"Add {model} to dynamic info queue",
+    )
     # Remove the local file
     os.remove(out_path)