Spaces:

bstraehle
/

sft

Running

App Files Files Community

bstraehle commited on Jul 6

Commit

cbbb9fd

•

1 Parent(s): 0f87f9e

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -95

app.py CHANGED Viewed

@@ -1,102 +1,30 @@
-import torch
-from huggingface_hub import login
-from datasets import load_dataset
-from peft import LoraConfig
-from transformers import TrainingArguments
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-from trl import SFTTrainer
 def process(model_id, dataset):
-    print("111")
-    login(
-        token=os.environ.get("HF_TOKEN"),
-        add_to_git_credential=True
-    )
-    # Load Dolly Dataset.
-    print("222")
-    dataset = load_dataset("philschmid/dolly-15k-oai-style", split="train")
-    print(dataset[3]["messages"])
-    # Hugging Face model id
-    print("333")
-    model_id = "google/gemma-7b"
-    tokenizer_id = "philschmid/gemma-tokenizer-chatml"
-    # BitsAndBytesConfig int-4 config
-    print("444")
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
-    )
-    # Load model and tokenizer
-    print("444")
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map="auto",
-        attn_implementation="flash_attention_2",
-        torch_dtype=torch.bfloat16,
-        quantization_config=bnb_config
-    )
-    tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
-    tokenizer.padding_side = 'right' # to prevent warnings
-    # LoRA config based on QLoRA paper & Sebastian Raschka experiment
-    print("555")
-    peft_config = LoraConfig(
-            lora_alpha=8,
-            lora_dropout=0.05,
-            r=6,
-            bias="none",
-            target_modules="all-linear",
-            task_type="CAUSAL_LM",
-    )
-    print("666")
-    args = TrainingArguments(
-        output_dir="gemma-7b-dolly-chatml", # directory to save and repository id
-        num_train_epochs=3,                     # number of training epochs
-        per_device_train_batch_size=2,          # batch size per device during training
-        gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass
-        gradient_checkpointing=True,            # use gradient checkpointing to save memory
-        optim="adamw_torch_fused",              # use fused adamw optimizer
-        logging_steps=10,                       # log every 10 steps
-        save_strategy="epoch",                  # save checkpoint every epoch
-        bf16=True,                              # use bfloat16 precision
-        tf32=True,                              # use tf32 precision
-        learning_rate=2e-4,                     # learning rate, based on QLoRA paper
-        max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
-        warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
-        lr_scheduler_type="constant",           # use constant learning rate scheduler
-        push_to_hub=False,                       # push model to hub
-        report_to="tensorboard",                # report metrics to tensorboard
-    )
-    print("777")
-    max_seq_length = 1512 # max sequence length for model and packing of the dataset
-    trainer = SFTTrainer(
-        model=model,
-        args=args,
-        train_dataset=dataset,
-        peft_config=peft_config,
-        max_seq_length=max_seq_length,
-        tokenizer=tokenizer,
-        packing=True,
-        dataset_kwargs={
-            "add_special_tokens": False, # We template with special tokens
-            "append_concat_token": False, # No need to add additional separator token
-        }
     )
-    # start training, the model will be automatically saved to the hub and the output directory
-    print("888")
-    trainer.train()
-    # save model
-    print("999")
-    trainer.save_model()
     return "Done"
 demo = gr.Interface(fn=process,

+import os
+from huggingface_hub import HfApi, login
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 def process(model_id, dataset):
+    # Download Sample Model from Hugging Face to Publish Again
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+    # Local Path of Model
+    model_path = 't5-fine-tune-save-example'
+    model.save_pretrained(model_path)
+    login(token=os.environ["HF_TOKEN"])
+    api = HfApi()
+    model_repo_name = f"bstraehle/{model_id}"
+    #Create Repo in Hugging Face
+    api.create_repo(repo_id=model_repo_name)
+    #Upload Model folder from Local to HuggingFace
+    api.upload_folder(
+        folder_path=model_path,
+        repo_id=model_repo_name
     )
+    # Publish Model Tokenizer on Hugging Face
+    tokenizer.push_to_hub(model_repo_name)
     return "Done"
 demo = gr.Interface(fn=process,