sft / app.py
bstraehle's picture
Update app.py
7465957 verified
raw
history blame
1.6 kB
import gradio as gr
import os
from datasets import load_dataset
from huggingface_hub import HfApi, login
from transformers import AutoTokenizer, AutoModelForCausalLM
# Run on NVidia A10G Large (sleep after 1 hour)
# Model IDs:
#
# google/gemma-2-9b-it
# meta-llama/Meta-Llama-3-8B-Instruct
# Datasets:
#
# gretelai/synthetic_text_to_sql
profile = "bstraehle"
def download_model(model_id):
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
model.save_pretrained(model_id)
return tokenizer
def download_dataset(dataset):
ds = load_dataset(dataset)
return ""
def fine_tune_model():
return ""
def upload_model(model_id, tokenizer):
model_name = model_id[model_id.rfind('/')+1:]
model_repo_name = f"{profile}/{model_name}"
login(token=os.environ["HF_TOKEN"])
api = HfApi()
api.create_repo(repo_id=model_repo_name)
api.upload_folder(
folder_path=model_id,
repo_id=model_repo_name
)
tokenizer.push_to_hub(model_repo_name)
return model_repo_name
def process(model_id, dataset):
tokenizer = download_model(model_id)
model_repo_name = upload_model(model_id, tokenizer)
return model_repo_name
demo = gr.Interface(fn=process,
inputs=[gr.Textbox(label = "Model ID", value = "meta-llama/Meta-Llama-3-8B-Instruct", lines = 1),
gr.Textbox(label = "Dataset", value = "gretelai/synthetic_text_to_sql", lines = 1)],
outputs=[gr.Textbox(label = "Completion")])
demo.launch()