bstraehle commited on
Commit
cbbb9fd
1 Parent(s): 0f87f9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -95
app.py CHANGED
@@ -1,102 +1,30 @@
1
- import torch
2
- from huggingface_hub import login
3
- from datasets import load_dataset
4
- from peft import LoraConfig
5
- from transformers import TrainingArguments
6
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
7
- from trl import SFTTrainer
8
 
9
  def process(model_id, dataset):
10
- print("111")
11
- login(
12
- token=os.environ.get("HF_TOKEN"),
13
- add_to_git_credential=True
14
- )
15
- # Load Dolly Dataset.
16
- print("222")
17
- dataset = load_dataset("philschmid/dolly-15k-oai-style", split="train")
18
-
19
- print(dataset[3]["messages"])
20
-
21
- # Hugging Face model id
22
- print("333")
23
- model_id = "google/gemma-7b"
24
- tokenizer_id = "philschmid/gemma-tokenizer-chatml"
25
-
26
- # BitsAndBytesConfig int-4 config
27
- print("444")
28
- bnb_config = BitsAndBytesConfig(
29
- load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
30
- )
31
-
32
- # Load model and tokenizer
33
- print("444")
34
- model = AutoModelForCausalLM.from_pretrained(
35
- model_id,
36
- device_map="auto",
37
- attn_implementation="flash_attention_2",
38
- torch_dtype=torch.bfloat16,
39
- quantization_config=bnb_config
40
- )
41
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
42
- tokenizer.padding_side = 'right' # to prevent warnings
43
-
44
- # LoRA config based on QLoRA paper & Sebastian Raschka experiment
45
- print("555")
46
- peft_config = LoraConfig(
47
- lora_alpha=8,
48
- lora_dropout=0.05,
49
- r=6,
50
- bias="none",
51
- target_modules="all-linear",
52
- task_type="CAUSAL_LM",
53
- )
54
-
55
- print("666")
56
- args = TrainingArguments(
57
- output_dir="gemma-7b-dolly-chatml", # directory to save and repository id
58
- num_train_epochs=3, # number of training epochs
59
- per_device_train_batch_size=2, # batch size per device during training
60
- gradient_accumulation_steps=2, # number of steps before performing a backward/update pass
61
- gradient_checkpointing=True, # use gradient checkpointing to save memory
62
- optim="adamw_torch_fused", # use fused adamw optimizer
63
- logging_steps=10, # log every 10 steps
64
- save_strategy="epoch", # save checkpoint every epoch
65
- bf16=True, # use bfloat16 precision
66
- tf32=True, # use tf32 precision
67
- learning_rate=2e-4, # learning rate, based on QLoRA paper
68
- max_grad_norm=0.3, # max gradient norm based on QLoRA paper
69
- warmup_ratio=0.03, # warmup ratio based on QLoRA paper
70
- lr_scheduler_type="constant", # use constant learning rate scheduler
71
- push_to_hub=False, # push model to hub
72
- report_to="tensorboard", # report metrics to tensorboard
73
- )
74
-
75
- print("777")
76
- max_seq_length = 1512 # max sequence length for model and packing of the dataset
77
-
78
- trainer = SFTTrainer(
79
- model=model,
80
- args=args,
81
- train_dataset=dataset,
82
- peft_config=peft_config,
83
- max_seq_length=max_seq_length,
84
- tokenizer=tokenizer,
85
- packing=True,
86
- dataset_kwargs={
87
- "add_special_tokens": False, # We template with special tokens
88
- "append_concat_token": False, # No need to add additional separator token
89
- }
90
  )
91
-
92
- # start training, the model will be automatically saved to the hub and the output directory
93
- print("888")
94
- trainer.train()
95
-
96
- # save model
97
- print("999")
98
- trainer.save_model()
99
 
 
 
 
100
  return "Done"
101
 
102
  demo = gr.Interface(fn=process,
 
1
+ import os
2
+ from huggingface_hub import HfApi, login
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
4
 
5
  def process(model_id, dataset):
6
+ # Download Sample Model from Hugging Face to Publish Again
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
9
+ # Local Path of Model
10
+ model_path = 't5-fine-tune-save-example'
11
+ model.save_pretrained(model_path)
12
+ login(token=os.environ["HF_TOKEN"])
13
+ api = HfApi()
14
+ model_repo_name = f"bstraehle/{model_id}"
15
+
16
+ #Create Repo in Hugging Face
17
+ api.create_repo(repo_id=model_repo_name)
18
+
19
+ #Upload Model folder from Local to HuggingFace
20
+ api.upload_folder(
21
+ folder_path=model_path,
22
+ repo_id=model_repo_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  )
 
 
 
 
 
 
 
 
24
 
25
+ # Publish Model Tokenizer on Hugging Face
26
+ tokenizer.push_to_hub(model_repo_name)
27
+
28
  return "Done"
29
 
30
  demo = gr.Interface(fn=process,