multi-mlx-my-repo

Sleeping

App Files Files Community

reach-vb HF staff

pcuenq HF staff commited on Oct 18

Commit

173d502

•

1 Parent(s): af46da6

pedro-dev (#22)

Browse files

- Fix cache removal (8d4ed6d0cc348fb02dec4066ea6aa71a5431df92)
- Set cache to a directory (383d050be2c87b73393c15023504f8e5e39e31bd)
- Convert (d5eb6e15f92173e93a9b26dbe3a935d3431c3a1c)
- Cleanup (2c3ad17d2dccee44bc09c71183dc58e1f590c06f)
- Apply quant method (dcd5ecd654cf9067d7f1efe272fe9ae6351602e5)

Co-authored-by: Pedro Cuenca <[email protected]>

Files changed (3) hide show

app.py +22 -28
cache/.keep +0 -0
converted/.keep +0 -0

app.py CHANGED Viewed

@@ -1,20 +1,17 @@
 import os
-import shutil
-import subprocess
-import signal
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 import gradio as gr
-from huggingface_hub import create_repo, HfApi
-from huggingface_hub import snapshot_download
 from huggingface_hub import whoami
 from huggingface_hub import ModelCard
-from huggingface_hub import login
 from huggingface_hub import scan_cache_dir
 from huggingface_hub import logging
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from apscheduler.schedulers.background import BackgroundScheduler
 from textwrap import dedent
@@ -22,23 +19,24 @@ from textwrap import dedent
 import mlx_lm
 from mlx_lm import convert
-from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Type, Union
 HF_TOKEN = os.environ.get("HF_TOKEN")
 def clear_hf_cache_space():
     scan = scan_cache_dir()
     to_delete = []
     for repo in scan.repos:
         if repo.repo_type == "model":
-            to_delete.append([rev.commit_hash for rev in repo.revisions])
-    scan.delete_revisions(to_delete)
     print("Cache has been cleared")
 def upload_to_hub(path, upload_repo, hf_path, token):
     card = ModelCard.load(hf_path)
     card.data.tags = ["mlx"] if card.data.tags is None else card.data.tags + ["mlx"]
     card.data.base_model = hf_path
@@ -86,33 +84,29 @@ def upload_to_hub(path, upload_repo, hf_path, token):
     )
     print(f"Upload successful, go to https://huggingface.co/{upload_repo} for details.")
-def process_model(model_id, q_method,oauth_token: gr.OAuthToken | None):
     if oauth_token.token is None:
         raise ValueError("You must be logged in to use MLX-my-repo")
     model_name = model_id.split('/')[-1]
-    print(model_name)
     username = whoami(oauth_token.token)["name"]
-    print(username)
-    # login(token=oauth_token.token, add_to_git_credential=True)
     try:
-        upload_repo = username + "/" + model_name + "-mlx"
         print(upload_repo)
-        convert(model_id, quantize=True)
-        print("Conversion done")
-        upload_to_hub(path="mlx_model", upload_repo=upload_repo, hf_path=repo_id, token=oauth_token.token)
-        print("Upload done")
         return (
-            f'Find your repo <a href=\'{new_repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
             "llama.png",
         )
     except Exception as e:
         return (f"Error: {e}", "error.png")
     finally:
-        shutil.rmtree("mlx_model", ignore_errors=True)
         clear_hf_cache_space()
         print("Folder cleaned up successfully!")

 import os
+import tempfile
+os.environ["HF_HUB_CACHE"] = "cache"
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 import gradio as gr
+from huggingface_hub import HfApi
 from huggingface_hub import whoami
 from huggingface_hub import ModelCard
 from huggingface_hub import scan_cache_dir
 from huggingface_hub import logging
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from apscheduler.schedulers.background import BackgroundScheduler
 from textwrap import dedent
 import mlx_lm
 from mlx_lm import convert
 HF_TOKEN = os.environ.get("HF_TOKEN")
+# I'm not sure if we need to add more stuff here
+QUANT_PARAMS = {
+    "Q4": 4,
+    "Q8": 8,
+}
 def clear_hf_cache_space():
     scan = scan_cache_dir()
     to_delete = []
     for repo in scan.repos:
         if repo.repo_type == "model":
+            to_delete.extend([rev.commit_hash for rev in repo.revisions])
+    scan.delete_revisions(*to_delete).execute()
     print("Cache has been cleared")
 def upload_to_hub(path, upload_repo, hf_path, token):
     card = ModelCard.load(hf_path)
     card.data.tags = ["mlx"] if card.data.tags is None else card.data.tags + ["mlx"]
     card.data.base_model = hf_path
     )
     print(f"Upload successful, go to https://huggingface.co/{upload_repo} for details.")
+def process_model(model_id, q_method, oauth_token: gr.OAuthToken | None):
     if oauth_token.token is None:
         raise ValueError("You must be logged in to use MLX-my-repo")
     model_name = model_id.split('/')[-1]
     username = whoami(oauth_token.token)["name"]
     try:
+        upload_repo = f"{username}/{model_name}-{q_method}-mlx"
         print(upload_repo)
+        with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
+            # The target dir must not exist
+            mlx_path = os.path.join(tmpdir, "mlx")
+            convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=QUANT_PARAMS[q_method])
+            print("Conversion done")
+            upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, token=oauth_token.token)
+            print("Upload done")
         return (
+            f'Find your repo <a href="https://hf.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>',
             "llama.png",
         )
     except Exception as e:
         return (f"Error: {e}", "error.png")
     finally:
         clear_hf_cache_space()
         print("Folder cleaned up successfully!")

cache/.keep ADDED Viewed

File without changes

converted/.keep ADDED Viewed

File without changes