Spaces:

onnx
/

export

Running on CPU Upgrade

App Files Files Community

Felix Marty commited on Nov 3, 2022

Commit

f75daf5

•

1 Parent(s): 89d7a1e

add sketch

Browse files

Files changed (3) hide show

app.py +84 -4
onnx_export.py +132 -0
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,7 +1,87 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+import csv
+import datetime
+import os
+from typing import Optional
 import gradio as gr
+from onnx_export import convert
+from huggingface_hub import HfApi, Repository
+DATASET_REPO_URL = "https://huggingface.co/datasets/safetensors/conversions"
+DATA_FILENAME = "data.csv"
+DATA_FILE = os.path.join("data", DATA_FILENAME)
+HF_TOKEN = os.environ.get("HF_TOKEN")
+repo: Optional[Repository] = None
+if HF_TOKEN:
+    repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, token=HF_TOKEN)
+def onnx_export(token: str, model_id: str, task: str) -> str:
+    if token == "" or model_id == "":
+        return """
+        ### Invalid input 🐞
+        Please fill a token and model_id.
+        """
+    try:
+        api = HfApi(token=token)
+        commit_info = convert(api=api, model_id=model_id, task=task)
+        print("[commit_info]", commit_info)
+        # save in a private dataset:
+        if repo is not None:
+            repo.git_pull(rebase=True)
+            with open(DATA_FILE, "a") as csvfile:
+                writer = csv.DictWriter(
+                    csvfile, fieldnames=["model_id", "pr_url", "time"]
+                )
+                writer.writerow(
+                    {
+                        "model_id": model_id,
+                        "pr_url": commit_info.pr_url,
+                        "time": str(datetime.now()),
+                    }
+                )
+            commit_url = repo.push_to_hub()
+            print("[dataset]", commit_url)
+        return f"""
+        ### Success 🔥
+        Yay! This model was successfully converted and a PR was open using your token, here:
+        [{commit_info.pr_url}]({commit_info.pr_url})
+        """
+    except Exception as e:
+        return f"""
+        ### Error 😢😢
+        {e}
+        """
+DESCRIPTION = """
+The steps are the following:
+- Paste a read-access token from hf.co/settings/tokens. Read access is enough given that we will open a PR against the source repo.
+- Input a model id from the Hub
+- If necessary, input the task for this model.
+- Click "Convert to ONNX"
+- That's it! You'll get feedback if it works or not, and if it worked, you'll get the URL of the opened PR!
+"""
+demo = gr.Interface(
+    title="Convert any model to Safetensors and open a PR",
+    description=DESCRIPTION,
+    allow_flagging="never",
+    article="Check out the [Optimum repo on GitHub](https://github.com/huggingface/optimum)",
+    inputs=[
+        gr.Text(max_lines=1, label="your_hf_token"),
+        gr.Text(max_lines=1, label="model_id"),
+        gr.Text(max_lines=1, label="task")
+    ],
+    outputs=[gr.Markdown(label="output")],
+    fn=onnx_export,
+)
+demo.launch()

onnx_export.py ADDED Viewed

	@@ -0,0 +1,132 @@

+from optimum.exporters.tasks import TasksManager
+from optimum.exporters.onnx import OnnxConfigWithPast, export, validate_model_outputs
+from tempfile import TemporaryDirectory
+from transformers import AutoConfig, is_torch_available
+from transformers import AutoConfig
+from pathlib import Path
+import os
+import shutil
+import argparse
+from typing import Optional
+from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download, get_repo_discussions
+from huggingface_hub.file_download import repo_folder_name
+def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
+    try:
+        discussions = api.get_repo_discussions(repo_id=model_id)
+    except Exception:
+        return None
+    for discussion in discussions:
+        if discussion.status == "open" and discussion.is_pull_request and discussion.title == pr_title:
+            return discussion
+def convert_onnx(model_id: str, task: str, folder: str):
+        model_class = TasksManager.get_model_class_for_task(task)
+        config = AutoConfig.from_pretrained(model_id)
+        model = model_class.from_config(config)
+        device = "cpu"  # ?
+        # Dynamic axes aren't supported for YOLO-like models. This means they cannot be exported to ONNX on CUDA devices.
+        # See: https://github.com/ultralytics/yolov5/pull/8378
+        if model.__class__.__name__.startswith("Yolos") and device != "cpu":
+            return
+        onnx_config_class_constructor = TasksManager.get_exporter_config_constructor(model_type=config.model_type, exporter="onnx", task=task, model_name=model_id)
+        onnx_config = onnx_config_class_constructor(model.config)
+        # We need to set this to some value to be able to test the outputs values for batch size > 1.
+        if (
+            isinstance(onnx_config, OnnxConfigWithPast)
+            and getattr(model.config, "pad_token_id", None) is None
+            and task == "sequence-classification"
+        ):
+            model.config.pad_token_id = 0
+            if is_torch_available():
+                from optimum.exporters.onnx.utils import TORCH_VERSION
+                if not onnx_config.is_torch_support_available:
+                    print(
+                        "Skipping due to incompatible PyTorch version. Minimum required is"
+                        f" {onnx_config.MIN_TORCH_VERSION}, got: {TORCH_VERSION}"
+                    )
+                onnx_inputs, onnx_outputs = export(
+                    model, onnx_config, onnx_config.DEFAULT_ONNX_OPSET, Path(folder), device=device
+                )
+                atol = onnx_config.ATOL_FOR_VALIDATION
+                if isinstance(atol, dict):
+                    atol = atol[task.replace("-with-past", "")]
+                validate_model_outputs(
+                    onnx_config,
+                    model,
+                    Path(folder),
+                    onnx_outputs,
+                    atol,
+                )
+            # TODO: iterate in folder and add all
+            operations = [CommitOperationAdd(path_in_repo=local.split("/")[-1], path_or_fileobj=local) for local in local_filenames]
+            return operations
+def convert(api: "HfApi", model_id: str, task:str, force: bool=False) -> Optional["CommitInfo"]:
+    pr_title = "Adding ONNX file of this model"
+    info = api.model_info(model_id)
+    filenames = set(s.rfilename for s in info.siblings)
+    with TemporaryDirectory() as d:
+        folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
+        os.makedirs(folder)
+        new_pr = None
+        try:
+            pr = previous_pr(api, model_id, pr_title)
+            if "model.onnx" in filenames and not force:
+                raise Exception(f"Model {model_id} is already converted, skipping..")
+            elif pr is not None and not force:
+                url = f"https://huggingface.co/{model_id}/discussions/{pr.num}"
+                new_pr = pr
+                raise Exception(f"Model {model_id} already has an open PR check out {url}")
+            else:
+                convert_onnx(model_id, task, folder)
+        finally:
+            shutil.rmtree(folder)
+        return new_pr
+if __name__ == "__main__":
+    DESCRIPTION = """
+    Simple utility tool to convert automatically a model on the hub to onnx format.
+    It is PyTorch exclusive for now.
+    It works by downloading the weights (PT), converting them locally, and uploading them back
+    as a PR on the hub.
+    """
+    parser = argparse.ArgumentParser(description=DESCRIPTION)
+    parser.add_argument(
+        "model_id",
+        type=str,
+        help="The name of the model on the hub to convert. E.g. `gpt2` or `facebook/wav2vec2-base-960h`",
+    )
+    parser.add_argument(
+        "task",
+        type=str,
+        help="The task the model is performing",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Create the PR even if it already exists of if the model was already converted.",
+    )
+    args = parser.parse_args()
+    api = HfApi()
+    convert(api, args.model_id, task=args.task, force=args.force)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+transformers
+git+https://github.com/huggingface/optimum.git#egg=optimum[onnxruntime]