File size: 14,970 Bytes
f0426a6 ac66ae2 ad99c34 d6a8f30 7465957 d6a8f30 083fde1 613b540 251d88f 74c640a c8534fb e92ef1c df44c11 38576e5 df44c11 467c88a df44c11 7f9f34a 0aa11b1 df44c11 38576e5 03a8827 16d75b5 03a8827 16d75b5 03a8827 16d75b5 03a8827 16d75b5 03a8827 16d75b5 03a8827 df44c11 6dd3828 7f9f34a e6a4e68 7f9f34a f0426a6 50ba747 0aa11b1 f0426a6 0aa11b1 7f9f34a f0426a6 7f9f34a f0426a6 7f9f34a f0426a6 7f9f34a f0426a6 7f9f34a f0426a6 048518f f0426a6 7f9f34a f0426a6 7f9f34a 613b540 50ba747 825f16a 50ba747 83710df 50ba747 83710df 50ba747 83710df 50ba747 83710df 50ba747 01e1b5d ada7179 74c640a cbbb9fd 5eaca58 ada7179 cbbb9fd ada7179 39546c6 0fb434b ada7179 2b03f9f 74c640a ada7179 74c640a 2b03f9f e69ea59 e566aba df44c11 227477e e92ef1c 74c640a 065dd39 a184b8b 2371111 835fa92 e6a4e68 13e776a 94ca6da 13e776a 73899fd 083fde1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 |
# https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/sample_finetune.py
import gradio as gr
import os, torch
#from datasets import load_dataset
#from huggingface_hub import HfApi, login
#from peft import AutoPeftModelForCausalLM, LoraConfig
#from random import randint
#from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, pipeline
#from trl import SFTTrainer, setup_chat_format
import datasets, sys, logging, torch, transformers
from datasets import load_dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from trl import SFTTrainer
# Fine-tune on NVidia 4xL4 (sleep after 10 hours)
hf_profile = "bstraehle"
action_1 = "Fine-tune pre-trained model"
action_2 = "Prompt fine-tuned model"
system_prompt = "You are a text to SQL query translator. Given a question in English, generate a SQL query based on the provided SCHEMA. Do not generate any additional text. SCHEMA: {schema}"
user_prompt = "What is the total trade value and average price for each trader and stock in the trade_history table?"
schema = "CREATE TABLE trade_history (id INT, trader_id INT, stock VARCHAR(255), price DECIMAL(5,2), quantity INT, trade_time TIMESTAMP);"
base_model_id = "microsoft/Phi-3-mini-4k-instruct"
dataset = "b-mc2/sql-create-context"
def prompt_model(model_id, system_prompt, user_prompt, schema):
pipe = pipeline("text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device_map="auto",
max_new_tokens=1000)
messages = [
{"role": "system", "content": system_prompt.format(schema=schema)},
{"role": "user", "content": user_prompt},
{"role": "assistant", "content": ""}
]
output = pipe(messages)
result = output[0]["generated_text"][-1]["content"]
print(result)
return result
# peft_model_id = "./code-llama-7b-text-to-sql"
# # peft_model_id = args.output_dir
# # Load Model with PEFT adapter
# model = AutoPeftModelForCausalLM.from_pretrained(
# peft_model_id,
# device_map="auto",
# torch_dtype=torch.float16
# )
# tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# # load into pipeline
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
###
# eval_dataset = load_dataset("json", data_files="test_dataset.json", split="train")
# rand_idx = randint(0, len(eval_dataset))
# # Test on sample
# prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
# outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
# print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
# print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")
# print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")
def fine_tune_model(base_model_id, dataset):
test(base_model_id, dataset)
##tokenizer = download_model(base_model_id)
#prepare_dataset(dataset)
#train_model(base_model_id)
##fine_tuned_model_id = upload_model(base_model_id, tokenizer)
return "fine_tuned_model_id"
def create_conversation(sample):
return {
"messages": [
{"role": "system", "content": system_prompt.format(schema=sample["context"])},
{"role": "user", "content": sample["question"]},
{"role": "assistant", "content": sample["answer"]}
]
}
# Define the formatting function for the prompts
def formatting_prompts_func(examples):
convos = examples["conversations"]
texts = []
mapper = {"system": "system\n", "human": "\nuser\n", "gpt": "\nassistant\n"}
end_mapper = {"system": "", "human": "", "gpt": ""}
for convo in convos:
text = "".join(f"{mapper[(turn := x['from'])]} {x['value']}\n{end_mapper[turn]}" for x in convo)
texts.append(f"{text}{tokenizer.eos_token}")
return {"text": texts}
def test(base_model_id, dataset):
###################
# Hyper-parameters
###################
training_config = {
"bf16": True,
"do_eval": False,
"learning_rate": 5.0e-06,
"log_level": "info",
"logging_steps": 20,
"logging_strategy": "steps",
"lr_scheduler_type": "cosine",
"num_train_epochs": 1,
"max_steps": -1,
"output_dir": "./checkpoint_dir",
"overwrite_output_dir": True,
"per_device_eval_batch_size": 4,
"per_device_train_batch_size": 4,
"remove_unused_columns": True,
"save_steps": 100,
"save_total_limit": 1,
"seed": 0,
"gradient_checkpointing": True,
"gradient_checkpointing_kwargs":{"use_reentrant": False},
"gradient_accumulation_steps": 1,
"warmup_ratio": 0.2,
}
peft_config = {
"r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"bias": "none",
"task_type": "CAUSAL_LM",
"target_modules": "all-linear",
"modules_to_save": None,
}
train_conf = TrainingArguments(**training_config)
peft_conf = LoraConfig(**peft_config)
###############
# Setup logging
###############
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
log_level = train_conf.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
# Log on each process a small summary
logger.warning(
f"Process rank: {train_conf.local_rank}, device: {train_conf.device}, n_gpu: {train_conf.n_gpu}"
+ f" distributed training: {bool(train_conf.local_rank != -1)}, 16-bits training: {train_conf.fp16}"
)
logger.info(f"Training/evaluation parameters {train_conf}")
logger.info(f"PEFT parameters {peft_conf}")
################
# Model Loading
################
checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
# checkpoint_path = "microsoft/Phi-3-mini-128k-instruct"
model_kwargs = dict(
use_cache=False,
trust_remote_code=True,
attn_implementation="flash_attention_2", # loading the model with flash-attenstion support
torch_dtype=torch.bfloat16,
device_map=None
)
model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
tokenizer.model_max_length = 2048
tokenizer.pad_token = tokenizer.unk_token # use unk rather than eos token to prevent endless generation
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'right'
##################
# Data Processing
##################
def apply_chat_template(
example,
tokenizer,
):
messages = example["messages"]
example["text"] = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=False)
return example
raw_dataset = load_dataset("HuggingFaceH4/ultrachat_200k")
train_dataset = raw_dataset["train_sft"]
test_dataset = raw_dataset["test_sft"]
column_names = list(train_dataset.features)
processed_train_dataset = train_dataset.map(
apply_chat_template,
fn_kwargs={"tokenizer": tokenizer},
num_proc=10,
remove_columns=column_names,
desc="Applying chat template to train_sft",
)
processed_test_dataset = test_dataset.map(
apply_chat_template,
fn_kwargs={"tokenizer": tokenizer},
num_proc=10,
remove_columns=column_names,
desc="Applying chat template to test_sft",
)
###########
# Training
###########
trainer = SFTTrainer(
model=model,
args=train_conf,
peft_config=peft_conf,
train_dataset=processed_train_dataset,
eval_dataset=processed_test_dataset,
max_seq_length=2048,
dataset_text_field="text",
tokenizer=tokenizer,
packing=True
)
train_result = trainer.train()
metrics = train_result.metrics
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()
#############
# Evaluation
#############
tokenizer.padding_side = 'left'
metrics = trainer.evaluate()
metrics["eval_samples"] = len(processed_test_dataset)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
# ############
# # Save model
# ############
trainer.save_model(train_conf.output_dir)
def download_model(base_model_id):
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
model = AutoModelForCausalLM.from_pretrained(base_model_id)
model.save_pretrained(base_model_id)
return tokenizer
def prepare_dataset(dataset):
dataset = load_dataset(dataset, split="train")
dataset = dataset.shuffle().select(range(12500))
# Convert dataset to OAI messages
dataset = dataset.map(create_conversation, remove_columns=dataset.features,batched=False)
# split dataset into 10,000 training samples and 2,500 test samples
dataset = dataset.train_test_split(test_size=2500/12500)
print(dataset["train"][345]["messages"])
# save datasets to disk
dataset["train"].to_json("train_dataset.json", orient="records")
dataset["test"].to_json("test_dataset.json", orient="records")
###
def train_model(model_id):
print("111")
dataset = load_dataset("json", data_files="train_dataset.json", split="train")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)
print("222")
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
#attn_implementation="flash_attention_2",
torch_dtype=torch.bfloat16,
quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right' # to prevent warnings
print("333")
# # set chat template to OAI chatML, remove if you start from a fine-tuned model
model, tokenizer = setup_chat_format(model, tokenizer)
peft_config = LoraConfig(
lora_alpha=128,
lora_dropout=0.05,
r=256,
bias="none",
target_modules="all-linear",
task_type="CAUSAL_LM",
)
print("444")
args = TrainingArguments(
output_dir="code-llama-7b-text-to-sql", # directory to save and repository id
num_train_epochs=3, # number of training epochs
per_device_train_batch_size=3, # batch size per device during training
gradient_accumulation_steps=2, # number of steps before performing a backward/update pass
gradient_checkpointing=True, # use gradient checkpointing to save memory
optim="adamw_torch_fused", # use fused adamw optimizer
logging_steps=10, # log every 10 steps
save_strategy="epoch", # save checkpoint every epoch
learning_rate=2e-4, # learning rate, based on QLoRA paper
bf16=True, # use bfloat16 precision
tf32=True, # use tf32 precision
max_grad_norm=0.3, # max gradient norm based on QLoRA paper
warmup_ratio=0.03, # warmup ratio based on QLoRA paper
lr_scheduler_type="constant", # use constant learning rate scheduler
push_to_hub=True, # push model to hub
report_to="tensorboard", # report metrics to tensorboard
)
max_seq_length = 3072 # max sequence length for model and packing of the dataset
print("555")
trainer = SFTTrainer(
model=model,
args=args,
train_dataset=dataset,
peft_config=peft_config,
max_seq_length=max_seq_length,
tokenizer=tokenizer,
packing=True,
dataset_kwargs={
"add_special_tokens": False, # We template with special tokens
"append_concat_token": False, # No need to add additional separator token
}
)
print("666")
# start training, the model will be automatically saved to the hub and the output directory
trainer.train()
print("777")
# save model
trainer.save_model()
del model
del trainer
torch.cuda.empty_cache()
def upload_model(base_model_id, tokenizer):
fine_tuned_model_id = replace_hf_profile(base_model_id)
login(token=os.environ["HF_TOKEN"])
api = HfApi()
#api.delete_repo(repo_id=fine_tuned_model_id, repo_type="model")
api.create_repo(repo_id=fine_tuned_model_id)
api.upload_folder(
folder_path=base_model_id,
repo_id=fine_tuned_model_id
)
tokenizer.push_to_hub(fine_tuned_model_id)
return fine_tuned_model_id
def replace_hf_profile(base_model_id):
model_id = base_model_id[base_model_id.rfind('/')+1:]
return f"{hf_profile}/{model_id}"
def process(action, base_model_id, dataset, system_prompt, user_prompt, schema):
#raise gr.Error("Please clone and bring your own credentials.")
if action == action_1:
result = fine_tune_model(base_model_id, dataset)
elif action == action_2:
fine_tuned_model_id = replace_hf_profile(base_model_id)
result = prompt_model(fine_tuned_model_id, system_prompt, user_prompt, schema)
return result
demo = gr.Interface(fn=process,
inputs=[gr.Radio([action_1, action_2], label = "Action", value = action_1),
gr.Textbox(label = "Base Model ID", value = base_model_id, lines = 1),
gr.Textbox(label = "Dataset", value = dataset, lines = 1),
gr.Textbox(label = "System Prompt", value = system_prompt, lines = 2),
gr.Textbox(label = "User Prompt", value = user_prompt, lines = 2),
gr.Textbox(label = "Schema", value = schema, lines = 2)],
outputs=[gr.Textbox(label = "Completion", value = os.environ["OUTPUT"])])
demo.launch() |