open_pt_llm_leaderboard

Restarting on CPU Upgrade

App Files Files Community

eduagarcia commited on Jan 23

Commit

9839977

•

1 Parent(s): 359d8a9

Merge Origin - Rename model types (#1)

Browse files

- testing hiding the CI for the updater (47aab9dee9b5199b9c0ac9afc01c6159c1541b60)
- testing the dynamic updater at 30, without the CI (0b5382088e122d5c0f9380a3b74103659027e348)
- flag models (4b67a330d44b119fdf5ef65bc435bc291092901e)
- better checkboxes, better filtering (f04f90eb834bf75d912827652781831866453e8c)
- relaxed filters on merged models (bcf0226a9a21925c68838637656f555a79e4de82)
- mini fix (c2cc6bf98a7a9ad3ef801aac4496de52b424881d)
- change model types available at submission time (05bda40b490bc20ff5fccd6c5e36445fcc06c652)
- merge and moerge update (193f184fa5f98a440e9ab6d8750f10fa2b17e887)
- update for adapters on the hub (5c07fb7121ea75f208e42fb69bc850b72d95a6ba)
- Merge branch 'main' of https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard into merge_origin (af9a67984183f7f91cc055b27ac86c6fb31906fb)

Files changed (9) hide show

app.py +21 -42
src/display/about.py +2 -3
src/display/utils.py +7 -7
src/leaderboard/filter_models.py +8 -1
src/populate.py +2 -2
src/scripts/update_all_request_files.py +11 -34
src/submission/check_validity.py +35 -4
src/submission/submit.py +3 -30
update_dynamic.py +4 -0

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ from src.tools.plots import (
 )
 # Start ephemeral Spaces on PRs (see config in README.md)
-enable_space_ci()
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
@@ -106,13 +106,10 @@ def update_table(
     type_query: list,
     precision_query: str,
     size_query: list,
-    show_deleted: bool,
-    show_merges: bool,
-    show_moe: bool,
-    show_flagged: bool,
     query: str,
 ):
-    filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted, show_merges, show_moe, show_flagged)
     filtered_df = filter_queries(query, filtered_df)
     df = select_columns(filtered_df, columns)
     return df
@@ -160,21 +157,21 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
 def filter_models(
-    df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool, show_merges: bool, show_moe:bool, show_flagged: bool
 ) -> pd.DataFrame:
     # Show all models
-    if show_deleted:
-        filtered_df = df
-    else:  # Show only still on the hub models
         filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
-    if not show_merges:
         filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
-    if not show_moe:
         filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
-    if not show_flagged:
         filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
     type_emoji = [t[0] for t in type_query]
@@ -193,10 +190,7 @@ leaderboard_df = filter_models(
     type_query=[t.to_str(" : ") for t in ModelType],
     size_query=list(NUMERIC_INTERVALS.keys()),
     precision_query=[i.value.name for i in Precision],
-    show_deleted=True,
-    show_merges=False,
-    show_moe=True,
-    show_flagged=False
 )
 demo = gr.Blocks(css=custom_css)
@@ -231,17 +225,11 @@ with demo:
                             interactive=True,
                         )
                     with gr.Row():
-                        deleted_models_visibility = gr.Checkbox(
-                            value=True, label="Show private/deleted models", interactive=True
-                        )
-                        merged_models_visibility = gr.Checkbox(
-                            value=False, label="Show merges", interactive=True
-                        )
-                        moe_models_visibility = gr.Checkbox(
-                            value=True, label="Show MoE", interactive=True
-                        )
-                        flagged_models_visibility = gr.Checkbox(
-                            value=False, label="Show flagged models", interactive=True
                         )
                 with gr.Column(min_width=320):
                     #with gr.Box(elem_id="box-filter"):
@@ -296,10 +284,7 @@ with demo:
                     filter_columns_type,
                     filter_columns_precision,
                     filter_columns_size,
-                    deleted_models_visibility,
-                    merged_models_visibility,
-                    moe_models_visibility,
-                    flagged_models_visibility,
                     search_bar,
                 ],
                 leaderboard_table,
@@ -315,10 +300,7 @@ with demo:
                     filter_columns_type,
                     filter_columns_precision,
                     filter_columns_size,
-                    deleted_models_visibility,
-                    merged_models_visibility,
-                    moe_models_visibility,
-                    flagged_models_visibility,
                     search_bar,
                 ],
                 leaderboard_table,
@@ -326,7 +308,7 @@ with demo:
             # Check query parameter once at startup and update search bar + hidden component
             demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
-            for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility, merged_models_visibility, moe_models_visibility, flagged_models_visibility]:
                 selector.change(
                     update_table,
                     [
@@ -335,10 +317,7 @@ with demo:
                         filter_columns_type,
                         filter_columns_precision,
                         filter_columns_size,
-                        deleted_models_visibility,
-                        merged_models_visibility,
-                        moe_models_visibility,
-                        flagged_models_visibility,
                         search_bar,
                     ],
                     leaderboard_table,
@@ -487,7 +466,7 @@ with demo:
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=10800) # restarted every 3h
-scheduler.add_job(update_dynamic_files, "cron", minute=00) # launched every hour on the hour
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch()

 )
 # Start ephemeral Spaces on PRs (see config in README.md)
+#enable_space_ci()
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
     type_query: list,
     precision_query: str,
     size_query: list,
+    hide_models: list,
     query: str,
 ):
+    filtered_df = filter_models(df=hidden_df, type_query=type_query, size_query=size_query, precision_query=precision_query, hide_models=hide_models)
     filtered_df = filter_queries(query, filtered_df)
     df = select_columns(filtered_df, columns)
     return df
 def filter_models(
+    df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, hide_models: list
 ) -> pd.DataFrame:
     # Show all models
+    if "Private or deleted" in hide_models:
         filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
+    else:
+        filtered_df = df
+    if "Contains a merge/moerge" in hide_models:
         filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
+    if "MoE" in hide_models:
         filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
+    if "Flagged" in hide_models:
         filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
     type_emoji = [t[0] for t in type_query]
     type_query=[t.to_str(" : ") for t in ModelType],
     size_query=list(NUMERIC_INTERVALS.keys()),
     precision_query=[i.value.name for i in Precision],
+    hide_models=["Contains a merge/moerge", "Flagged"], # "Private or deleted", "Contains a merge/moerge", "Flagged"
 )
 demo = gr.Blocks(css=custom_css)
                             interactive=True,
                         )
                     with gr.Row():
+                        hide_models = gr.CheckboxGroup(
+                            label="Hide models",
+                            choices = ["Private or deleted", "Contains a merge/moerge", "Flagged", "MoE"],
+                            value=["Private or deleted", "Contains a merge/moerge", "Flagged"],
+                            interactive=True
                         )
                 with gr.Column(min_width=320):
                     #with gr.Box(elem_id="box-filter"):
                     filter_columns_type,
                     filter_columns_precision,
                     filter_columns_size,
+                    hide_models,
                     search_bar,
                 ],
                 leaderboard_table,
                     filter_columns_type,
                     filter_columns_precision,
                     filter_columns_size,
+                    hide_models,
                     search_bar,
                 ],
                 leaderboard_table,
             # Check query parameter once at startup and update search bar + hidden component
             demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
+            for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, hide_models]:
                 selector.change(
                     update_table,
                     [
                         filter_columns_type,
                         filter_columns_precision,
                         filter_columns_size,
+                        hide_models,
                         search_bar,
                     ],
                     leaderboard_table,
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=10800) # restarted every 3h
+scheduler.add_job(update_dynamic_files, "cron", minute=30) # launched every hour on the hour
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch()

src/display/about.py CHANGED Viewed

@@ -64,9 +64,8 @@ Side note on the baseline scores:
 ## Icons
 - {ModelType.PT.to_str(" : ")} model: new, base models, trained on a given corpora
 - {ModelType.FT.to_str(" : ")} model: pretrained models finetuned on more data
-Specific fine-tune subcategories (more adapted to chat):
-- {ModelType.IFT.to_str(" : ")} model: instruction fine-tunes, which are model fine-tuned specifically on datasets of task instruction
-- {ModelType.RL.to_str(" : ")} model: reinforcement fine-tunes, which usually change the model loss a bit with an added policy.
 If there is no icon, we have not uploaded the information on the model yet, feel free to open an issue with the model information!
 "Flagged" indicates that this model has been flagged by the community, and should probably be ignored! Clicking the link will redirect you to the discussion about the model.

 ## Icons
 - {ModelType.PT.to_str(" : ")} model: new, base models, trained on a given corpora
 - {ModelType.FT.to_str(" : ")} model: pretrained models finetuned on more data
+- {ModelType.chat.to_str(" : ")} model: chat like fine-tunes, either using IFT (datasets of task instruction), RLHF or DPO (changing the model loss a bit with an added policy), etc
+- {ModelType.merges.to_str(" : ")} model: merges or MoErges, models which have been merged or fused without additional fine-tuning.
 If there is no icon, we have not uploaded the information on the model yet, feel free to open an issue with the model information!
 "Flagged" indicates that this model has been flagged by the community, and should probably be ignored! Clicking the link will redirect you to the discussion about the model.

src/display/utils.py CHANGED Viewed

@@ -210,9 +210,9 @@ class ModelDetails:
 class ModelType(Enum):
     PT = ModelDetails(name="pretrained", symbol="🟢")
-    FT = ModelDetails(name="fine-tuned", symbol="🔶")
-    IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
-    RL = ModelDetails(name="RL-tuned", symbol="🟦")
     Unknown = ModelDetails(name="", symbol="?")
     def to_str(self, separator=" "):
@@ -224,10 +224,10 @@ class ModelType(Enum):
             return ModelType.FT
         if "pretrained" in type or "🟢" in type:
             return ModelType.PT
-        if "RL-tuned" in type or "🟦" in type:
-            return ModelType.RL
-        if "instruction-tuned" in type or "⭕" in type:
-            return ModelType.IFT
         return ModelType.Unknown
 class WeightType(Enum):

 class ModelType(Enum):
     PT = ModelDetails(name="pretrained", symbol="🟢")
+    FT = ModelDetails(name="fine-tuned on domain-specific datasets", symbol="🔶")
+    chat = ModelDetails(name="chat models (RLHF, DPO, IFT, ...)", symbol="💬")
+    merges = ModelDetails(name="base merges and moerges", symbol="🤝")
     Unknown = ModelDetails(name="", symbol="?")
     def to_str(self, separator=" "):
             return ModelType.FT
         if "pretrained" in type or "🟢" in type:
             return ModelType.PT
+        if any([k in type for k in ["instruction-tuned", "RL-tuned", "chat", "🟦", "⭕", "💬"]]):
+            return ModelType.chat
+        if "merge" in type or "🤝" in type:
+            return ModelType.merges
         return ModelType.Unknown
 class WeightType(Enum):

src/leaderboard/filter_models.py CHANGED Viewed

@@ -43,6 +43,13 @@ FLAGGED_MODELS = {
     "dillfrescott/trinity-medium": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
     "udkai/Garrulus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/526",
     "dfurman/GarrulusMarcoro-7B-v0.1": "https://huggingface.co/dfurman/GarrulusMarcoro-7B-v0.1/discussions/1",
     # Merges not indicated
     "gagan3012/MetaModelv2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
     "gagan3012/MetaModelv3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
@@ -126,6 +133,6 @@ def remove_forbidden_models(leaderboard_data: list[dict]):
     return leaderboard_data
-def filter_models(leaderboard_data: list[dict]):
     leaderboard_data = remove_forbidden_models(leaderboard_data)
     flag_models(leaderboard_data)

     "dillfrescott/trinity-medium": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
     "udkai/Garrulus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/526",
     "dfurman/GarrulusMarcoro-7B-v0.1": "https://huggingface.co/dfurman/GarrulusMarcoro-7B-v0.1/discussions/1",
+    "udkai/Turdus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
+    "eren23/slerp-test-turdus-beagle": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
+    "abideen/NexoNimbus-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
+    "alnrg2arg/test2_3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
+    "nfaheem/Marcoroni-7b-DPO-Merge": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
+    "CultriX/MergeTrix-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
+    "liminerity/Blur-7b-v1.21": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
     # Merges not indicated
     "gagan3012/MetaModelv2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
     "gagan3012/MetaModelv3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
     return leaderboard_data
+def filter_models_flags(leaderboard_data: list[dict]):
     leaderboard_data = remove_forbidden_models(leaderboard_data)
     flag_models(leaderboard_data)

src/populate.py CHANGED Viewed

@@ -5,7 +5,7 @@ import pandas as pd
 from src.display.formatting import has_no_nan_values, make_requests_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
-from src.leaderboard.filter_models import filter_models
 from src.leaderboard.read_evals import get_raw_eval_results
@@ -13,7 +13,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,
     raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     all_data_json.append(baseline_row)
-    filter_models(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)

 from src.display.formatting import has_no_nan_values, make_requests_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
+from src.leaderboard.filter_models import filter_models_flags
 from src.leaderboard.read_evals import get_raw_eval_results
     raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     all_data_json.append(baseline_row)
+    filter_models_flags(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)

src/scripts/update_all_request_files.py CHANGED Viewed

@@ -3,7 +3,8 @@ from huggingface_hub import ModelCard
 import json
 import time
-from src.submission.check_validity import is_model_on_hub, check_model_card
 from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
 def update_models(file_path, models):
@@ -28,45 +29,20 @@ def update_models(file_path, models):
             #data['params'] = get_model_size(model_cfg, data['precision'])
             data['license'] = model_cfg.card_data.license if model_cfg.card_data is not None else ""
-            # Is the model still on the hub
-            still_on_hub, error, model_config = is_model_on_hub(
-                model_name=model_id, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False, token=H4_TOKEN
             )
             data['still_on_hub'] = still_on_hub
             tags = []
             if still_on_hub:
-                model = model_id
-                modelcard_OK, error_msg = check_model_card(model)
-                model_card = None
-                if modelcard_OK:
-                    model_card = ModelCard.load(model)
-                is_merge_from_metadata = False
-                is_moe_from_metadata = False
-                is_merge_from_model_card = False
-                is_moe_from_model_card = False
-                # Storing the model tags
-                moe_keywords = ["moe", "mixture of experts", "mixtral"]
-                if modelcard_OK:
-                    if model_card.data.tags:
-                        is_merge_from_metadata = "merge" in model_card.data.tags
-                        is_moe_from_metadata = "moe" in model_card.data.tags
-                    merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
-                    # If the model is a merge but not saying it in the metadata, we flag it
-                    is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
-                    if is_merge_from_model_card or is_merge_from_metadata:
-                        tags.append("merge")
-                        if not is_merge_from_metadata:
-                            tags.append("flagged:undisclosed_merge")
-                    is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
-                is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
-                if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
-                    tags.append("moe")
-                    if not is_moe_from_metadata:
-                        tags.append("flagged:undisclosed_moe")
             data["tags"] = tags
@@ -108,3 +84,4 @@ def update_dynamic_files():
         commit_message=f"Daily request file update.",
     )
     print(f"UPDATE_DYNAMIC: pushed to hub")

 import json
 import time
+from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_tags
 from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
 def update_models(file_path, models):
             #data['params'] = get_model_size(model_cfg, data['precision'])
             data['license'] = model_cfg.card_data.license if model_cfg.card_data is not None else ""
+            # Is the model still on the hub?
+            model_name = model_id
+            if model_cfg.card_data is not None and model_cfg.card_data.base_model is not None:
+                model_name = model_cfg.card_data.base_model # for adapters, we look at the parent model
+            still_on_hub, _, _ = is_model_on_hub(
+                model_name=model_name, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False, token=H4_TOKEN
             )
             data['still_on_hub'] = still_on_hub
             tags = []
             if still_on_hub:
+                status, _, model_card = check_model_card(model_id)
+                tags = get_model_tags(model_card, model_id)
             data["tags"] = tags
         commit_message=f"Daily request file update.",
     )
     print(f"UPDATE_DYNAMIC: pushed to hub")

src/submission/check_validity.py CHANGED Viewed

@@ -19,7 +19,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
     try:
         card = ModelCard.load(repo_id)
     except huggingface_hub.utils.EntryNotFoundError:
-        return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
     # Enforce license metadata
     if card.data.license is None:
@@ -27,13 +27,13 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
             return False, (
                 "License not found. Please add a license to your model card using the `license` metadata or a"
                 " `license_name`/`license_link` pair."
-            )
     # Enforce card content
     if len(card.text) < 200:
-        return False, "Please add a description to your model card, it is too short."
-    return True, ""
 def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=TRUST_REMOTE_CODE, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
@@ -133,3 +133,34 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
                     users_to_submission_dates[organisation].append(info["submitted_time"])
     return set(file_names), users_to_submission_dates

     try:
         card = ModelCard.load(repo_id)
     except huggingface_hub.utils.EntryNotFoundError:
+        return False, "Please add a model card to your model to explain how you trained/fine-tuned it.", None
     # Enforce license metadata
     if card.data.license is None:
             return False, (
                 "License not found. Please add a license to your model card using the `license` metadata or a"
                 " `license_name`/`license_link` pair."
+            ), None
     # Enforce card content
     if len(card.text) < 200:
+        return False, "Please add a description to your model card, it is too short.", None
+    return True, "", card
 def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=TRUST_REMOTE_CODE, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
                     users_to_submission_dates[organisation].append(info["submitted_time"])
     return set(file_names), users_to_submission_dates
+def get_model_tags(model_card, model: str):
+    is_merge_from_metadata = False
+    is_moe_from_metadata = False
+    is_merge_from_model_card = False
+    is_moe_from_model_card = False
+    # Storing the model tags
+    tags = []
+    moe_keywords = ["moe", "mixture of experts", "mixtral"]
+    if model_card is not None:
+        if model_card.data.tags:
+            is_merge_from_metadata = "merge" in model_card.data.tags
+            is_moe_from_metadata = "moe" in model_card.data.tags
+        merge_keywords = ["merged model", "merge model"]
+        # If the model is a merge but not saying it in the metadata, we flag it
+        is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
+        if is_merge_from_model_card or is_merge_from_metadata:
+            tags.append("merge")
+            if not is_merge_from_metadata:
+                tags.append("flagged:undisclosed_merge")
+        moe_keywords = ["moe", "mixtral"]
+        is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
+    is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
+    if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
+        tags.append("moe")
+        # We no longer tag undisclosed MoEs
+        #if not is_moe_from_metadata:
+        #    tags.append("flagged:undisclosed_moe")
+    return tags

src/submission/submit.py CHANGED Viewed

@@ -13,6 +13,7 @@ from src.submission.check_validity import (
     get_model_size,
     is_model_on_hub,
     user_submission_permission,
 )
 REQUESTED_MODELS = None
@@ -97,37 +98,9 @@ def add_new_eval(
         license = None
         #return styled_error("Please select a license for your model")
-    modelcard_OK, error_msg = check_model_card(model)
-    model_card = None
-    if modelcard_OK:
-        model_card = ModelCard.load(model)
-    is_merge_from_metadata = False
-    is_moe_from_metadata = False
-    is_merge_from_model_card = False
-    is_moe_from_model_card = False
-    # Storing the model tags
-    tags = []
-    moe_keywords = ["moe", "mixture of experts", "mixtral"]
-    if modelcard_OK:
-        if model_card.data.tags:
-            is_merge_from_metadata = "merge" in model_card.data.tags
-            is_moe_from_metadata = "moe" in model_card.data.tags
-        merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
-        # If the model is a merge but not saying it in the metadata, we flag it
-        is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
-        if is_merge_from_model_card or is_merge_from_metadata:
-            tags.append("merge")
-            if not is_merge_from_metadata:
-                tags.append("flagged:undisclosed_merge")
-        is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
-    is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
-    if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
-        tags.append("moe")
-        if not is_moe_from_metadata:
-            tags.append("flagged:undisclosed_moe")
     # Seems good, creating the eval
     print("Adding new eval")

     get_model_size,
     is_model_on_hub,
     user_submission_permission,
+    get_model_tags
 )
 REQUESTED_MODELS = None
         license = None
         #return styled_error("Please select a license for your model")
+    modelcard_OK, error_msg, model_card = check_model_card(model)
+    tags = get_model_tags(model_card, model)
     # Seems good, creating the eval
     print("Adding new eval")

update_dynamic.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from src.scripts.update_all_request_files import update_dynamic_files
+if __name__ == "__main__":
+    update_dynamic_files()