{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "2eSvM9zX_2d3" }, "outputs": [], "source": [ "# import torch\n", "\n", "# !pip install wandb" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# import wandb\n", "# wandb.login()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# %env WANDB_WATCH=all\n", "# %env WANDB_SILENT=true" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# entity = \"wandb\"\n", "# wandb.init(project=\"llama-unsloth-test-project\", name = \"llama-unsloth-test-3\")" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "\n", "# test token limit at 3500 and changed attention dimnsion to 32" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] } GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "6.117 GB of memory reserved. There seems to be an error with Unsloth: https://github.com/hiyouga/LLaMA-Factory/issues/3255" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "wandb: fast-sea-14 " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "crashed due to memory issues on step 13. So I brought down the attention headers to 32. Wandb: stellar-breeze-15 " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "actually it might have been the RSLoRA with causing the memory issue. Turned it off. ==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "6.117 GB of memory reserved. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_112603-fijz686r" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run wobbly-breeze-16 to Weights & Biases (docs)
==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.605 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# try rank stabalized LoRA on 16 attention dimension" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.605 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved. You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers The dataset will be iterated until the `max_steps` are reached.\n", " warnings.warn( GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "0.771 GB of memory reserved. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\") This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers\n", "Unsloth 2024.4 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.\n" ] } ], "source": [ "import time\n", "from transformers import TextStreamer\n", "from unsloth import FastLanguageModel\n", "\n", "model_or_checkpoint = 'tiny_llama_newData'\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = model_or_checkpoint, \n", " max_seq_length = 2048,\n", " dtype = None,\n", " load_in_4bit = True,\n", ")\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "ins = \"\"\"divisions: '480'\n", "key:\n", " fifths: '-3'\n", " mode: major\n", "time:\n", " beats: '4'\n", " beat-type: '4'\n", "clef:\n", " sign: G\n", " line: '2'\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "inp = \"\"\"- a00_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "- a00_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: F\n", " kind:\n", " '@text': m\n", " '#text': minor\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: B\n", " root-alter: '-1'\n", " kind:\n", " '@text': '7'\n", " '#text': dominant\n", " a01_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " \n", "\n", "### Instruction:\n", "divisions: '480'\n", "key:\n", " fifths: '-3'\n", " mode: major\n", "time:\n", " beats: '4'\n", " beat-type: '4'\n", "clef:\n", " sign: G\n", " line: '2'\n", "\n", "\n", "### Input:\n", "- a00_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "- a00_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: F\n", " kind:\n", " '@text': m\n", " '#text': minor\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: B\n", " root-alter: '-1'\n", " kind:\n", " '@text': '7'\n", " '#text': dominant\n", " a01_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "\n", "\n", "### Response:\n", "- a00_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", "Generated in 19.536226272583008 seconds.\n" ] } ], "source": [ "# alpaca_prompt = Copied from above\n", "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", "inputs = tokenizer(\n", "[\n", " alpaca_prompt.format( ins, inp,\n", " \"\", # output - leave this blank for generation!\n", " )\n", "], return_tensors = \"pt\").to(\"cuda\")\n", "\n", "start_time = time.time()\n", "text_streamer = TextStreamer(tokenizer)\n", "_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 500)\n", "print(f\"Generated in {time.time() - start_time} seconds.\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# Pipeline Toy Example" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import time\n", "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " BitsAndBytesConfig,\n", " HfArgumentParser,\n", " TrainingArguments,\n", " pipeline,\n", " logging,\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# reference: https://colab.research.google.com/drive/134o_cXcMe_lsvl15ZE_4Y75Kstepsntu?usp=sharing\n", "def load_model(model_name):\n", " # Load tokenizer and model with QLoRA configuration\n", " compute_dtype = getattr(torch, \"float16\")\n", "\n", " bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"float16\",\n", " bnb_4bit_compute_dtype=compute_dtype,\n", " bnb_4bit_use_double_quant=False,\n", " )\n", "\n", " if compute_dtype == torch.float16:\n", " major, _ = torch.cuda.get_device_capability()\n", " if major >= 8:\n", " print(\"=\" * 80)\n", " print(\"Your GPU supports bfloat16, you can accelerate training with the argument --bf16\")\n", " print(\"=\" * 80)\n", "\n", " model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " device_map=device_map,\n", " quantization_config=bnb_config\n", " )\n", "\n", " model.config.use_cache = False\n", " model.config.pretraining_tp = 1\n", "\n", " # Load LoRA configuration\n", " peft_config = LoraConfig(\n", " lora_alpha=32,\n", " lora_dropout=0,\n", " r=32,\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\",\n", " )\n", "\n", " # Load Tokenizer\n", " tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", " tokenizer.pad_token = tokenizer.eos_token\n", " tokenizer.padding_side = \"right\"\n", "\n", " return model, tokenizer, peft_config" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# reference: https://colab.research.google.com/drive/134o_cXcMe_lsvl15ZE_4Y75Kstepsntu?usp=sharing#scrollTo=XK4lTwqFflzE\n", " \n", "def text_gen_eval_wrapper(model, tokenizer, prompt, model_id=1, show_metrics=True, temp=0.7, max_new_tokens=200):\n", "\n", " # Initialize the pipeline\n", " pipe = pipeline(task=\"text-generation\",\n", " model=model,\n", " tokenizer=tokenizer,\n", " max_new_tokens=max_new_tokens,\n", " do_sample=True,\n", " temperature=temp)\n", "\n", " # Generate text using the pipeline\n", " pipe = pipeline(task=\"text-generation\",\n", " model=model,\n", " tokenizer=tokenizer,\n", " max_new_tokens=max_new_tokens)\n", " result = pipe(prompt)\n", " generated_text = result[0]['generated_text']\n", "\n", "\n", " index = generated_text.find(\"### Response:\")\n", " if index != -1:\n", " \n", " substring_after_assistant = generated_text[index + len(\"### Response:\"):].strip()\n", " else:\n", "\n", " substring_after_assistant = generated_text.strip()\n", "\n", " return substring_after_assistant" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['quant_method']. These kwargs are not used in .\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "================================================================================\n", "Your GPU supports bfloat16, you can accelerate training with the argument --bf16\n", "================================================================================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/martin/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/transformers/quantizers/auto.py:159: UserWarning: You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used.\n", " warnings.warn(warning_msg)\n" ] } ], "source": [ "from peft import LoraConfig\n", "device_map = {\"\": 0}\n", "model, tokenizer, peft_config = load_model('tiny_llama_newData')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "ins = \"\"\"divisions: '480'\n", "key:\n", " fifths: '-3'\n", " mode: major\n", "time:\n", " beats: '4'\n", " beat-type: '4'\n", "clef:\n", " sign: G\n", " line: '2'\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "inp = \"\"\"- a00_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "- a00_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: F\n", " kind:\n", " '@text': m\n", " '#text': minor\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: B\n", " root-alter: '-1'\n", " kind:\n", " '@text': '7'\n", " '#text': dominant\n", " a01_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"\\n\\n### Instruction:\\ndivisions: '480'\\nkey:\\n fifths: '-3'\\n mode: major\\ntime:\\n beats: '4'\\n beat-type: '4'\\nclef:\\n sign: G\\n line: '2'\\n\\n\\n### Input:\\n- a00_note:\\n rest: null\\n duration: '960'\\n type: half\\n a01_harmony:\\n root:\\n root-step: E\\n root-alter: '-1'\\n kind:\\n '@text': ''\\n '#text': major\\n a02_note:\\n pitch:\\n step: E\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_note:\\n pitch:\\n step: A\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a01_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a02_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_note:\\n pitch:\\n step: B\\n alter: '-1'\\n octave: '3'\\n duration: '960'\\n type: half\\n a01_harmony:\\n root:\\n root-step: A\\n root-alter: '-1'\\n kind:\\n '@text': ''\\n '#text': major\\n a02_note:\\n rest: null\\n duration: '960'\\n type: half\\n- a00_harmony:\\n root:\\n root-step: E\\n root-alter: '-1'\\n kind:\\n '@text': ''\\n '#text': major\\n a01_note:\\n rest: null\\n duration: '960'\\n type: half\\n a02_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_note:\\n pitch:\\n step: A\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a01_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a02_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_harmony:\\n root:\\n root-step: A\\n root-alter: '-1'\\n kind:\\n '@text': ''\\n '#text': major\\n a01_note:\\n pitch:\\n step: E\\n alter: '-1'\\n octave: '4'\\n duration: '1920'\\n tie:\\n '@type': start\\n type: whole\\n- a00_note:\\n pitch:\\n step: E\\n alter: '-1'\\n octave: '4'\\n duration: '1920'\\n tie:\\n '@type': stop\\n type: whole\\n- a00_harmony:\\n root:\\n root-step: F\\n kind:\\n '@text': m\\n '#text': minor\\n a01_note:\\n rest: null\\n duration: '960'\\n type: half\\n a02_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_note:\\n pitch:\\n step: A\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a01_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a02_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: E\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_harmony:\\n root:\\n root-step: B\\n root-alter: '-1'\\n kind:\\n '@text': '7'\\n '#text': dominant\\n a01_note:\\n pitch:\\n step: D\\n octave: '4'\\n duration: '1920'\\n tie:\\n '@type': start\\n type: whole\\n\\n\\n### Response:\\n\"" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "message = alpaca_prompt.format( ins, inp,\n", " \"\", # output - leave this blank for generation!\n", " )\n", "message" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generated in 39.8182430267334 seconds.\n", "- a00_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n" ] } ], "source": [ "start_time = time.time()\n", "generated_text = text_gen_eval_wrapper(model, tokenizer, message, show_metrics=False, temp=.1, max_new_tokens=500)\n", "print(f\"Generated in {time.time() - start_time} seconds.\")\n", "print(generated_text)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generated in 39.7313814163208 seconds.\n", "- a00_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n" ] } ], "source": [ "start_time = time.time()\n", "generated_text = text_gen_eval_wrapper(model, tokenizer, message, show_metrics=False, max_new_tokens=500)\n", "print(f\"Generated in {time.time() - start_time} seconds.\")\n", "print(generated_text)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": 