File size: 15,177 Bytes
f0426a6
ac66ae2
ad99c34
d6a8f30
 
 
 
 
 
 
 
7465957
d6a8f30
 
 
083fde1
613b540
251d88f
74c640a
c8534fb
e92ef1c
 
df44c11
38576e5
df44c11
467c88a
df44c11
7f9f34a
0aa11b1
df44c11
38576e5
03a8827
 
 
 
 
 
 
 
 
 
 
 
 
 
16d75b5
03a8827
 
16d75b5
03a8827
 
 
 
 
 
 
 
 
16d75b5
 
 
03a8827
 
16d75b5
03a8827
 
 
16d75b5
03a8827
 
 
df44c11
6dd3828
7f9f34a
 
e6a4e68
 
7f9f34a
f0426a6
50ba747
0aa11b1
 
 
 
 
 
 
 
f0426a6
 
 
 
 
 
 
 
 
 
 
0aa11b1
7f9f34a
f0426a6
 
 
d66c584
f0426a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d66c584
 
f0426a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d66c584
f0426a6
 
 
 
 
 
c0c4a72
f0426a6
 
 
 
 
 
 
d66c584
f0426a6
 
 
7f9f34a
f0426a6
 
 
7f9f34a
f0426a6
 
 
d66c584
f0426a6
 
 
 
 
70d0c0d
f0426a6
 
 
d66c584
f0426a6
 
 
 
 
 
 
 
 
 
 
d66c584
 
f0426a6
 
 
 
 
 
 
 
 
d66c584
 
f0426a6
 
 
 
 
 
 
d66c584
 
f0426a6
 
 
 
 
 
 
 
 
 
 
 
d66c584
7f9f34a
 
f0426a6
 
 
 
 
048518f
f0426a6
 
7f9f34a
f0426a6
 
 
 
 
 
 
 
 
 
d66c584
f0426a6
 
 
 
 
 
 
 
 
 
d66c584
f0426a6
7f9f34a
613b540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50ba747
 
 
 
 
 
 
 
 
 
 
 
 
825f16a
50ba747
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83710df
50ba747
83710df
50ba747
83710df
50ba747
83710df
50ba747
 
 
 
01e1b5d
ada7179
74c640a
cbbb9fd
 
5eaca58
ada7179
cbbb9fd
ada7179
39546c6
0fb434b
ada7179
 
2b03f9f
74c640a
ada7179
74c640a
2b03f9f
e69ea59
e566aba
df44c11
227477e
e92ef1c
74c640a
065dd39
a184b8b
 
2371111
835fa92
e6a4e68
13e776a
94ca6da
 
13e776a
73899fd
083fde1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/sample_finetune.py
import gradio as gr
import os, torch
#from datasets import load_dataset
#from huggingface_hub import HfApi, login
#from peft import AutoPeftModelForCausalLM, LoraConfig
#from random import randint
#from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, pipeline
#from trl import SFTTrainer, setup_chat_format

import datasets, sys, logging, torch, transformers
from datasets import load_dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from trl import SFTTrainer

# Fine-tune on NVidia 4xL4 (sleep after 10 hours)

hf_profile = "bstraehle"

action_1 = "Fine-tune pre-trained model"
action_2 = "Prompt fine-tuned model"

system_prompt = "You are a text to SQL query translator. Given a question in English, generate a SQL query based on the provided SCHEMA. Do not generate any additional text. SCHEMA: {schema}"
user_prompt = "What is the total trade value and average price for each trader and stock in the trade_history table?"
schema = "CREATE TABLE trade_history (id INT, trader_id INT, stock VARCHAR(255), price DECIMAL(5,2), quantity INT, trade_time TIMESTAMP);"

base_model_id = "microsoft/Phi-3-mini-4k-instruct"
dataset = "b-mc2/sql-create-context"

def prompt_model(model_id, system_prompt, user_prompt, schema):
    pipe = pipeline("text-generation", 
                    model=model_id, 
                    model_kwargs={"torch_dtype": torch.bfloat16}, 
                    device_map="auto",
                    max_new_tokens=1000)
    messages = [
      {"role": "system", "content": system_prompt.format(schema=schema)},
      {"role": "user", "content": user_prompt},
      {"role": "assistant", "content": ""}
    ]
    output = pipe(messages)
    result = output[0]["generated_text"][-1]["content"]
    print(result)
    return result
    
#    peft_model_id = "./code-llama-7b-text-to-sql"
#    # peft_model_id = args.output_dir
     
#    # Load Model with PEFT adapter
#    model = AutoPeftModelForCausalLM.from_pretrained(
#      peft_model_id,
#      device_map="auto",
#      torch_dtype=torch.float16
#    )
#    tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
#    # load into pipeline
#    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

    ###

#    eval_dataset = load_dataset("json", data_files="test_dataset.json", split="train")
#    rand_idx = randint(0, len(eval_dataset))
     
#    # Test on sample
#    prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
#    outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
     
#    print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
#    print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")
#    print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")

def fine_tune_model(base_model_id, dataset):
    test(base_model_id, dataset)
    ##tokenizer = download_model(base_model_id)
    #prepare_dataset(dataset)
    #train_model(base_model_id)
    ##fine_tuned_model_id = upload_model(base_model_id, tokenizer)
    return "fine_tuned_model_id"

def create_conversation(sample):
  return {
    "messages": [
      {"role": "system", "content": system_prompt.format(schema=sample["context"])},
      {"role": "user", "content": sample["question"]},
      {"role": "assistant", "content": sample["answer"]}
    ]
  }

# Define the formatting function for the prompts
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = []
    mapper = {"system": "system\n", "human": "\nuser\n", "gpt": "\nassistant\n"}
    end_mapper = {"system": "", "human": "", "gpt": ""}
    for convo in convos:
        text = "".join(f"{mapper[(turn := x['from'])]} {x['value']}\n{end_mapper[turn]}" for x in convo)
        texts.append(f"{text}{tokenizer.eos_token}")
    return {"text": texts}
    
def test(base_model_id, dataset):
    ###################
    # Hyper-parameters
    ###################
    print("111")
    training_config = {
        "bf16": True,
        "do_eval": False,
        "learning_rate": 5.0e-06,
        "log_level": "info",
        "logging_steps": 20,
        "logging_strategy": "steps",
        "lr_scheduler_type": "cosine",
        "num_train_epochs": 1,
        "max_steps": -1,
        "output_dir": "./checkpoint_dir",
        "overwrite_output_dir": True,
        "per_device_eval_batch_size": 4,
        "per_device_train_batch_size": 4,
        "remove_unused_columns": True,
        "save_steps": 100,
        "save_total_limit": 1,
        "seed": 0,
        "gradient_checkpointing": True,
        "gradient_checkpointing_kwargs":{"use_reentrant": False},
        "gradient_accumulation_steps": 1,
        "warmup_ratio": 0.2,
        }

    print("222")
    peft_config = {
        "r": 16,
        "lora_alpha": 32,
        "lora_dropout": 0.05,
        "bias": "none",
        "task_type": "CAUSAL_LM",
        "target_modules": "all-linear",
        "modules_to_save": None,
    }
    train_conf = TrainingArguments(**training_config)
    peft_conf = LoraConfig(**peft_config)
    
    
    ###############
    # Setup logging
    ###############
    print("333")
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )
    log_level = train_conf.get_process_log_level()
    logger = logging.getLogger("FT")
    logger.setLevel(log_level)
    datasets.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.enable_default_handler()
    transformers.utils.logging.enable_explicit_format()
    
    # Log on each process a small summary
    print("444")
    logger.warning(
        f"Process rank: {train_conf.local_rank}, device: {train_conf.device}, n_gpu: {train_conf.n_gpu}"
        + f" distributed training: {bool(train_conf.local_rank != -1)}, 16-bits training: {train_conf.fp16}"
    )
    logger.info(f"Training/evaluation parameters {train_conf}")
    logger.info(f"PEFT parameters {peft_conf}")
    
    
    ################
    # Model Loading
    ################
    print("444")
    checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
    # checkpoint_path = "microsoft/Phi-3-mini-128k-instruct"
    model_kwargs = dict(
        use_cache=False,
        trust_remote_code=True,
        #attn_implementation="flash_attention_2",  # loading the model with flash-attenstion support
        torch_dtype=torch.bfloat16,
        device_map=None
    )
    print("555")
    model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
    tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
    tokenizer.model_max_length = 2048
    tokenizer.pad_token = tokenizer.unk_token  # use unk rather than eos token to prevent endless generation
    tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
    tokenizer.padding_side = 'right'
    
    
    ##################
    # Data Processing
    ##################
    print("666")
    def apply_chat_template(example, tokenizer):
        messages = example["messages"]
        example["text"] = tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=False)
        return example
    
    raw_dataset = load_dataset("HuggingFaceH4/ultrachat_200k")
    train_dataset = raw_dataset["train_sft"]
    test_dataset = raw_dataset["test_sft"]
    column_names = list(train_dataset.features)

    print("777")
    processed_train_dataset = train_dataset.map(
        apply_chat_template,
        fn_kwargs={"tokenizer": tokenizer},
        num_proc=10,
        remove_columns=column_names,
        desc="Applying chat template to train_sft",
    )

    print("888")
    processed_test_dataset = test_dataset.map(
        apply_chat_template,
        fn_kwargs={"tokenizer": tokenizer},
        num_proc=10,
        remove_columns=column_names,
        desc="Applying chat template to test_sft",
    )
    
    
    ###########
    # Training
    ###########
    print("999")
    trainer = SFTTrainer(
        model=model,
        args=train_conf,
        peft_config=peft_conf,
        train_dataset=processed_train_dataset,
        eval_dataset=processed_test_dataset,
        max_seq_length=2048,
        dataset_text_field="text",
        tokenizer=tokenizer,
        packing=True
    )
    train_result = trainer.train()
    metrics = train_result.metrics
    trainer.log_metrics("train", metrics)
    trainer.save_metrics("train", metrics)
    trainer.save_state()
    
    
    #############
    # Evaluation
    #############
    print("aaa")
    tokenizer.padding_side = 'left'
    metrics = trainer.evaluate()
    metrics["eval_samples"] = len(processed_test_dataset)
    trainer.log_metrics("eval", metrics)
    trainer.save_metrics("eval", metrics)
    
    
    # ############
    # # Save model
    # ############
    print("bbb")
    trainer.save_model(train_conf.output_dir)

def download_model(base_model_id):
    tokenizer = AutoTokenizer.from_pretrained(base_model_id)
    model = AutoModelForCausalLM.from_pretrained(base_model_id)
    model.save_pretrained(base_model_id)
    return tokenizer
    
def prepare_dataset(dataset):
    dataset = load_dataset(dataset, split="train")
    dataset = dataset.shuffle().select(range(12500))
     
    # Convert dataset to OAI messages
    dataset = dataset.map(create_conversation, remove_columns=dataset.features,batched=False)
    # split dataset into 10,000 training samples and 2,500 test samples
    dataset = dataset.train_test_split(test_size=2500/12500)
     
    print(dataset["train"][345]["messages"])
     
    # save datasets to disk
    dataset["train"].to_json("train_dataset.json", orient="records")
    dataset["test"].to_json("test_dataset.json", orient="records")
    ###

def train_model(model_id):
    print("111")
    dataset = load_dataset("json", data_files="train_dataset.json", split="train")

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
    )

    print("222")
    # Load model and tokenizer
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        #attn_implementation="flash_attention_2",
        torch_dtype=torch.bfloat16,
        quantization_config=bnb_config
    )
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.padding_side = 'right' # to prevent warnings

    print("333")
    # # set chat template to OAI chatML, remove if you start from a fine-tuned model
    model, tokenizer = setup_chat_format(model, tokenizer)

    peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=256,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
    )

    print("444")
    args = TrainingArguments(
        output_dir="code-llama-7b-text-to-sql", # directory to save and repository id
        num_train_epochs=3,                     # number of training epochs
        per_device_train_batch_size=3,          # batch size per device during training
        gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass
        gradient_checkpointing=True,            # use gradient checkpointing to save memory
        optim="adamw_torch_fused",              # use fused adamw optimizer
        logging_steps=10,                       # log every 10 steps
        save_strategy="epoch",                  # save checkpoint every epoch
        learning_rate=2e-4,                     # learning rate, based on QLoRA paper
        bf16=True,                              # use bfloat16 precision
        tf32=True,                              # use tf32 precision
        max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
        warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
        lr_scheduler_type="constant",           # use constant learning rate scheduler
        push_to_hub=True,                       # push model to hub
        report_to="tensorboard",                # report metrics to tensorboard
    )

    max_seq_length = 3072 # max sequence length for model and packing of the dataset

    print("555")
    trainer = SFTTrainer(
        model=model,
        args=args,
        train_dataset=dataset,
        peft_config=peft_config,
        max_seq_length=max_seq_length,
        tokenizer=tokenizer,
        packing=True,
        dataset_kwargs={
            "add_special_tokens": False,  # We template with special tokens
            "append_concat_token": False, # No need to add additional separator token
        }
    )

    print("666")
    # start training, the model will be automatically saved to the hub and the output directory
    trainer.train()
     
    print("777")
    # save model
    trainer.save_model()

    del model
    del trainer
    torch.cuda.empty_cache()
    
def upload_model(base_model_id, tokenizer):
    fine_tuned_model_id = replace_hf_profile(base_model_id)
    login(token=os.environ["HF_TOKEN"])
    api = HfApi()
    #api.delete_repo(repo_id=fine_tuned_model_id, repo_type="model")
    api.create_repo(repo_id=fine_tuned_model_id)
    api.upload_folder(
        folder_path=base_model_id,
        repo_id=fine_tuned_model_id
    )
    tokenizer.push_to_hub(fine_tuned_model_id)
    return fine_tuned_model_id

def replace_hf_profile(base_model_id):
    model_id = base_model_id[base_model_id.rfind('/')+1:]
    return f"{hf_profile}/{model_id}"

def process(action, base_model_id, dataset, system_prompt, user_prompt, schema):
    #raise gr.Error("Please clone and bring your own credentials.")
    if action == action_1:
        result = fine_tune_model(base_model_id, dataset)
    elif action == action_2:
        fine_tuned_model_id = replace_hf_profile(base_model_id)
        result = prompt_model(fine_tuned_model_id, system_prompt, user_prompt, schema)
    return result

demo = gr.Interface(fn=process, 
                    inputs=[gr.Radio([action_1, action_2], label = "Action", value = action_1),
                            gr.Textbox(label = "Base Model ID", value = base_model_id, lines = 1),
                            gr.Textbox(label = "Dataset", value = dataset, lines = 1),
                            gr.Textbox(label = "System Prompt", value = system_prompt, lines = 2),
                            gr.Textbox(label = "User Prompt", value = user_prompt, lines = 2),
                            gr.Textbox(label = "Schema", value = schema, lines = 2)],
                    outputs=[gr.Textbox(label = "Completion", value = os.environ["OUTPUT"])])
demo.launch()