{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "2eSvM9zX_2d3"
   },
   "outputs": [],
   "source": [
    "# import torch\n",
    "\n",
    "# !pip install wandb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import wandb\n",
    "# wandb.login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %env WANDB_WATCH=all\n",
    "# %env WANDB_SILENT=true"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# entity = \"wandb\"\n",
    "# wandb.init(project=\"llama-unsloth-test-project\", name = \"llama-unsloth-test-3\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "\n",
    "# test token limit at 3500 and changed attention dimnsion to 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 32, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0,\n",
    "    bias = \"none\",   \n",
    "    \n",
    "    use_gradient_checkpointing = \"unsloth\", \n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  \n",
    "    loftq_config = None, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 2,\n",
    "        gradient_accumulation_steps = 4,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"test1\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 60,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "took 11 mins"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "___"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# test change attention dimension to 64 and test new data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Because we are dealing with long contexts and music tends to be a bit tricky in terms of similarity with query and set of keys, I turned on Rank-Stabalized LoRA (Locally Rank Adapted Linear Attention)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Also, tried to turn on  LoftQ on to help with generalization: https://openreview.net/forum?id=LzPWWPAdY4\n",
    "However, it did not work. There seems to be an error with Unsloth: https://github.com/hiyouga/LLaMA-Factory/issues/3255"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "wandb: fast-sea-14 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "crashed due to memory issues on step 13. So I brought down the attention headers to 32. Wandb:  stellar-breeze-15 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "actually it might have been the RSLoRA with causing the memory issue. Turned it off. Wandb: wobbly-breeze-16"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 64, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0, \n",
    "    bias = \"none\",   \n",
    "    \n",
    "    use_gradient_checkpointing = \"unsloth\",\n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  \n",
    "    loftq_config = False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 2,\n",
    "        gradient_accumulation_steps = 4,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"tests\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "6.117 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 1,600 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient Accumulation steps = 4\n",
      "\\        /    Total batch size = 8 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 167,772,160\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_112603-fijz686r</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/fijz686r' target=\"_blank\">wobbly-breeze-16</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/fijz686r' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/fijz686r</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 10:24, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.295900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.304200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.289000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.291100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.256800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.240800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.196100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.193200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.184800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.156200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.158800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.138700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.125300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.128300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.119500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.129200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.130400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.115700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.121000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.107800</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "666.7279 seconds used for training.\n",
      "11.11 minutes used for training.\n",
      "Peak reserved memory = 12.84 GB.\n",
      "Peak reserved memory for training = 6.723 GB.\n",
      "Peak reserved memory % of max memory = 128.4 %.\n",
      "Peak reserved memory for training % of max memory = 67.23 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "overall by coparing the loss from before it performed worse, however, the dataset's input and output is less similar. Took 10 mins"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# test with only 16 attention dimension and using new data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    " wandb: autumn-breeze-17"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 16, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0, \n",
    "    bias = \"none\",    \n",
    "    \n",
    "    use_gradient_checkpointing = \"unsloth\",\n",
    "    random_state = 3407,\n",
    "    use_rslora = False, \n",
    "    loftq_config = None, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 2,\n",
    "        gradient_accumulation_steps = 4,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"tests\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.605 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 1,600 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient Accumulation steps = 4\n",
      "\\        /    Total batch size = 8 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 41,943,040\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_114127-4m452ks0</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/4m452ks0' target=\"_blank\">autumn-breeze-17</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/4m452ks0' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/4m452ks0</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 08:14, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.295900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.304200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.288800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.290000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.254400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.238600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.194200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.191500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.183000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.154000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.157500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.136600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.124900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.127600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.118500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.128100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.129200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.114900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.119500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.106400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "534.8471 seconds used for training.\n",
      "8.91 minutes used for training.\n",
      "Peak reserved memory = 11.961 GB.\n",
      "Peak reserved memory for training = 6.356 GB.\n",
      "Peak reserved memory % of max memory = 119.61 %.\n",
      "Peak reserved memory for training % of max memory = 63.56 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "performed almost identical within less time. took 8 mins.\n",
    "\n",
    "Definitely used less memory which allowed the GPU to not be as throttled. \n",
    "\n",
    "the model did not save because I made a mistake with the step number for saves"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# test with new data and 8 attention dimension"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "wandb: n fine-sun-18 not right its one after"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0, \n",
    "    bias = \"none\",    \n",
    "    \n",
    "    use_gradient_checkpointing = \"unsloth\",\n",
    "    random_state = 3407,\n",
    "    use_rslora = False, \n",
    "    loftq_config = None, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 2,\n",
    "        gradient_accumulation_steps = 4,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"test1\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 1,600 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient Accumulation steps = 4\n",
      "\\        /    Total batch size = 8 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_184601-sghnalif</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/sghnalif' target=\"_blank\">pretty-frost-46</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/sghnalif' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/sghnalif</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 08:34, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.295900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.304200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.288700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.289100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.252800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.237200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.192000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.190100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.180800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.152200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.156800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.136000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.124100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.127000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.118200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.127600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.128900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.114800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.119600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.106400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "556.5196 seconds used for training.\n",
      "9.28 minutes used for training.\n",
      "Peak reserved memory = 11.758 GB.\n",
      "Peak reserved memory for training = 6.231 GB.\n",
      "Peak reserved memory % of max memory = 117.58 %.\n",
      "Peak reserved memory for training % of max memory = 62.31 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "less time only took 8 mins. overall performed best with 16 attention headers. the loss was better here though. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# try rank stabalized LoRA on 16 attention dimension"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 16, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0, \n",
    "    bias = \"none\",    \n",
    "    \n",
    "    use_gradient_checkpointing = \"unsloth\",\n",
    "    random_state = 3407,\n",
    "    use_rslora = True, \n",
    "    loftq_config = None, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 2,\n",
    "        gradient_accumulation_steps = 4,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"trank\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.605 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient Accumulation steps = 4\n",
      "\\        /    Total batch size = 8 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 41,943,040\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_131246-4uu0wyna</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/4uu0wyna' target=\"_blank\">dandy-vortex-21</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/4uu0wyna' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/4uu0wyna</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 09:01, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.253800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.285200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.373500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.193400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.154800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.163400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.139200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.137600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.135000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.168600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.107500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.103400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.105700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.115200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.113900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.098000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.125100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.116600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.114100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.082700</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "580.8442 seconds used for training.\n",
      "9.68 minutes used for training.\n",
      "Peak reserved memory = 13.453 GB.\n",
      "Peak reserved memory for training = 7.848 GB.\n",
      "Peak reserved memory % of max memory = 134.53 %.\n",
      "Peak reserved memory for training % of max memory = 78.48 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "it worked with 16 header nodes. and it performed better than before "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# try rank stabalized LoRA on 8 attention dimension"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0, \n",
    "    bias = \"none\",    \n",
    "   \n",
    "    use_gradient_checkpointing = \"unsloth\",\n",
    "    random_state = 3407,\n",
    "    use_rslora = True,\n",
    "    loftq_config = None, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False,\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 2,\n",
    "        gradient_accumulation_steps = 4,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"trank8\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient Accumulation steps = 4\n",
      "\\        /    Total batch size = 8 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_152310-xma0riok</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/xma0riok' target=\"_blank\">apricot-universe-22</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/xma0riok' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/xma0riok</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 09:47, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.253800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.285200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.381300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.200500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.158900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.168500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.133300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.131100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.137000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.170900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.104100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.103700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.105500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.114200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.112900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.097900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.126100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.116800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.114700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.082200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "633.1061 seconds used for training.\n",
      "10.55 minutes used for training.\n",
      "Peak reserved memory = 13.252 GB.\n",
      "Peak reserved memory for training = 7.725 GB.\n",
      "Peak reserved memory % of max memory = 132.52 %.\n",
      "Peak reserved memory for training % of max memory = 77.25 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "performed similar "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# stabalized LoRA rank with attention dimension set to 8  \n",
    "with parameters tunning per_device_train_batch_size = 1, gradient_accumulation_steps = 2,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0,\n",
    "    bias = \"none\",   \n",
    "    \n",
    "    use_gradient_checkpointing = \"unsloth\", \n",
    "    random_state = 3407,\n",
    "    use_rslora = True, \n",
    "    loftq_config = False, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"trank8loft\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 1 | Gradient Accumulation steps = 2\n",
      "\\        /    Total batch size = 2 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_155226-hhbshxde</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/hhbshxde' target=\"_blank\">curious-thunder-24</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/hhbshxde' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/hhbshxde</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 01:37, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.302600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.205100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.240800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.227700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.225900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.200300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.193000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.194500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.170300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.259800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.109600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.151900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.166100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.087300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.109800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.092200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.165800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.071100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.089200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.081400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "112.476 seconds used for training.\n",
      "1.87 minutes used for training.\n",
      "Peak reserved memory = 10.824 GB.\n",
      "Peak reserved memory for training = 5.297 GB.\n",
      "Peak reserved memory % of max memory = 108.24 %.\n",
      "Peak reserved memory for training % of max memory = 52.97 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "\n",
    "This was the best one yet! super quick and used the least amount of memory. \n",
    "\n",
    "wandb:  curious-thunder-24 \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# stabalized LoRA rank with attention dimension set to 8  \n",
    "with parameters tunning per_device_train_batch_size = 1, gradient_accumulation_steps = 4,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0,\n",
    "    bias = \"none\",   \n",
    "   \n",
    "    use_gradient_checkpointing = \"unsloth\",\n",
    "    random_state = 3407,\n",
    "    use_rslora = True,  \n",
    "    loftq_config = False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token \n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        \n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 4,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"trank8grad4batch1\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 1 | Gradient Accumulation steps = 4\n",
      "\\        /    Total batch size = 4 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_155901-awqsiw6c</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/awqsiw6c' target=\"_blank\">bright-sky-26</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/awqsiw6c' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/awqsiw6c</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 03:24, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.253800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.255500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.259000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.294500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.357700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.219200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.150400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.126700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.132700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.097700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.171300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.126300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.101000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.103500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.126500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.130200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.112900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.149900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.144600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.163100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "224.2332 seconds used for training.\n",
      "3.74 minutes used for training.\n",
      "Peak reserved memory = 10.824 GB.\n",
      "Peak reserved memory for training = 5.297 GB.\n",
      "Peak reserved memory % of max memory = 108.24 %.\n",
      "Peak reserved memory for training % of max memory = 52.97 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 4,\n",
    "\n",
    "did not work as fast or as good. \n",
    "\n",
    "wandb: bright-sky-26 \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# normal LoRA attention dimension set to 8  \n",
    "with parameters tunning per_device_train_batch_size = 1, gradient_accumulation_steps = 4,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0, \n",
    "    bias = \"none\",    \n",
    "    \n",
    "    use_gradient_checkpointing = \"unsloth\", \n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  \n",
    "    loftq_config = False, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"noRank8grad2batch1\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 1 | Gradient Accumulation steps = 2\n",
      "\\        /    Total batch size = 2 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_161059-wmujat0b</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/wmujat0b' target=\"_blank\">northern-star-29</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/wmujat0b' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/wmujat0b</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 01:43, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.302600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.205100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.246600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.253600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.254900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.213000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.256600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.227300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.259000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.312900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.149600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.177200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.182900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.097100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.121700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.108800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.179700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.077900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.102200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.090400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "113.367 seconds used for training.\n",
      "1.89 minutes used for training.\n",
      "Peak reserved memory = 10.824 GB.\n",
      "Peak reserved memory for training = 5.297 GB.\n",
      "Peak reserved memory % of max memory = 108.24 %.\n",
      "Peak reserved memory for training % of max memory = 52.97 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "\n",
    "\n",
    "No rank\n",
    "\n",
    "it does not perform as well. \n",
    "\n",
    "wandb: northern-star-29 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# changed learning rate from 2e-4 to 4e-04"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8,\n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0,\n",
    "    bias = \"none\",    \n",
    "    \n",
    "    use_gradient_checkpointing = \"unsloth\", \n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  \n",
    "    loftq_config = False, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 4e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"noRank8grad2batch1\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 1 | Gradient Accumulation steps = 2\n",
      "\\        /    Total batch size = 2 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_165404-alkoysjn</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/alkoysjn' target=\"_blank\">jumping-donkey-31</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/alkoysjn' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/alkoysjn</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 01:41, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.302600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.205100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.244000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.235200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.231600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.201900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.203300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.197600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.170400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.258800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.121000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.151900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.172000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.088100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.114500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.092900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.171200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.072100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.094800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.083800</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "116.0351 seconds used for training.\n",
      "1.93 minutes used for training.\n",
      "Peak reserved memory = 10.824 GB.\n",
      "Peak reserved memory for training = 5.297 GB.\n",
      "Peak reserved memory % of max memory = 108.24 %.\n",
      "Peak reserved memory for training % of max memory = 52.97 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "\n",
    "\n",
    "learning rate: 4e-04\n",
    "\n",
    "This seems to have outperformed all. \n",
    "wandb:  jumping-donkey-31"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# cosine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0,\n",
    "    bias = \"none\",   \n",
    "    use_gradient_checkpointing = \"unsloth\", \n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  \n",
    "    loftq_config = False, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 4e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"cosine\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"cosD\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 1 | Gradient Accumulation steps = 2\n",
      "\\        /    Total batch size = 2 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_170156-r7rezfkp</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/r7rezfkp' target=\"_blank\">wise-morning-32</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/r7rezfkp' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/r7rezfkp</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 01:51, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.302600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.205100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.243600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.235100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.231700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.202500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.203700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.197600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.154000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.270100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.117900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.150000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.169500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.087300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.114300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.090100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.164900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.072000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.092600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.083200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "125.0917 seconds used for training.\n",
      "2.08 minutes used for training.\n",
      "Peak reserved memory = 10.824 GB.\n",
      "Peak reserved memory for training = 5.297 GB.\n",
      "Peak reserved memory % of max memory = 108.24 %.\n",
      "Peak reserved memory for training % of max memory = 52.97 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "\n",
    "\n",
    "learning rate: 4e-04 with COSINE\n",
    "\n",
    "This seems to have outperformed all. \n",
    "wandb:  wise-morning-32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# polynomial"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0,\n",
    "    bias = \"none\",   \n",
    "    use_gradient_checkpointing = \"unsloth\",\n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  \n",
    "    loftq_config = False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 3,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 40,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"polynomial\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"polyD\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 60,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 1 | Gradient Accumulation steps = 3\n",
      "\\        /    Total batch size = 3 | Total steps = 40\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240509_122040-1n4ycexy</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/1n4ycexy' target=\"_blank\">snowy-oath-62</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/1n4ycexy' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/1n4ycexy</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='40' max='40' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [40/40 07:44, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.289300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.214500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.267100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.253500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.297000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.368800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.305600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.228200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.169300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.121800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.159100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.107000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.094300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.179100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.137500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.118600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.079700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.129500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.127600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.097600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>21</td>\n",
       "      <td>0.129100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>22</td>\n",
       "      <td>0.139200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>23</td>\n",
       "      <td>0.084400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>24</td>\n",
       "      <td>0.169900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>25</td>\n",
       "      <td>0.151800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>26</td>\n",
       "      <td>0.158100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>27</td>\n",
       "      <td>0.124100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>28</td>\n",
       "      <td>0.136100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>29</td>\n",
       "      <td>0.106600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>30</td>\n",
       "      <td>0.084100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>31</td>\n",
       "      <td>0.083000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>32</td>\n",
       "      <td>0.084200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>33</td>\n",
       "      <td>0.117600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>34</td>\n",
       "      <td>0.092500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>35</td>\n",
       "      <td>0.107800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>36</td>\n",
       "      <td>0.065100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>37</td>\n",
       "      <td>0.128300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38</td>\n",
       "      <td>0.119500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>39</td>\n",
       "      <td>0.145800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>40</td>\n",
       "      <td>0.078400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "479.6709 seconds used for training.\n",
      "7.99 minutes used for training.\n",
      "Peak reserved memory = 10.826 GB.\n",
      "Peak reserved memory for training = 5.299 GB.\n",
      "Peak reserved memory % of max memory = 108.26 %.\n",
      "Peak reserved memory for training % of max memory = 52.99 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "\n",
    "\n",
    "learning rate: 4e-04 with polynomial\n",
    "\n",
    "This seems to have outperformed all. \n",
    "wandb:   magic-thunder-34 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# Linear"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None\n",
    "load_in_4bit = True\n",
    "\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0, \n",
    "    bias = \"none\",   \n",
    "    use_gradient_checkpointing = \"unsloth\", \n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  \n",
    "    loftq_config = False, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token \n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "        warmup_steps = 5,\n",
    "        max_steps = 20,\n",
    "        learning_rate = 3e-4,\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.001,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"toyCos3_001\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 20,\n",
    "        # report_to=\"wandb\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "5.527 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 93,742 | Num Epochs = 1\n",
      "O^O/ \\_/ \\    Batch size per device = 1 | Gradient Accumulation steps = 2\n",
      "\\        /    Total batch size = 2 | Total steps = 20\n",
      " \"-____-\"     Number of trainable parameters = 20,971,520\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.17.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.6"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/martin/cmpe258proj/wandb/run-20240508_174616-6x2u7unl</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/martin-chivo/huggingface/runs/6x2u7unl' target=\"_blank\">logical-bee-39</a></strong> to <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/martin-chivo/huggingface' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/martin-chivo/huggingface/runs/6x2u7unl' target=\"_blank\">https://wandb.ai/martin-chivo/huggingface/runs/6x2u7unl</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='20' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [20/20 01:38, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.302600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.205000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.245400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.244100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.241600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.201900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.213300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.200200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.183300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.282500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.117900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.156200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.173400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>0.088200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>0.115300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>0.093300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>0.167500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>0.073100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>0.094000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>0.086300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "109.386 seconds used for training.\n",
      "1.82 minutes used for training.\n",
      "Peak reserved memory = 10.824 GB.\n",
      "Peak reserved memory for training = 5.297 GB.\n",
      "Peak reserved memory % of max memory = 108.24 %.\n",
      "Peak reserved memory for training % of max memory = 52.97 %.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 2,\n",
    "\n",
    "COSINE\n",
    "\n",
    "learning rate: 3e-04 with linear\n",
    "weight_decay = 0.001,\n",
    "\n",
    "This seems to have outperformed all. it was the quickest\n",
    "wandb:    logical-bee-39 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "# Full Model Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n",
      "Unsloth: unsloth/tinyllama-bnb-4bit can only handle sequence lengths of at most 2048.\n",
      "But with kaiokendev's RoPE scaling of 1.709, it can be magically be extended to 3500!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 3500 \n",
    "dtype = None \n",
    "load_in_4bit = True \n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    # model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n",
    "    model_name = \"unsloth/tinyllama-bnb-4bit\", # \"unsloth/tinyllama\" for 16bit loading\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth 2024.4 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 8, \n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 16,\n",
    "    lora_dropout = 0, \n",
    "    bias = \"none\",   \n",
    "    use_gradient_checkpointing = \"unsloth\",\n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  \n",
    "    loftq_config = False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "       \n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n",
    "\n",
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n",
    "dataset = dataset.map(formatting_prompts_func, batched = True,)\n",
    "\n",
    "# test_dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_test\", split = \"test\")\n",
    "# test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n",
      "/home/martin/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:342: UserWarning: You passed `packing=True` to the SFTTrainer, and you are training your model with `max_steps` strategy. The dataset will be iterated until the `max_steps` are reached.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    # eval_dataset = test_dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = True, \n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 1,\n",
    "        gradient_accumulation_steps = 3,\n",
    "        warmup_steps = 5,\n",
    "        # warmup_ratio = 0.1,\n",
    "        max_steps = -1,\n",
    "        # max_steps = 20,\n",
    "        num_train_epochs=1,\n",
    "        learning_rate = 2e-4,\n",
    "        #check to see if bf16 is supported. if it is use it!\n",
    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
    "        bf16 = torch.cuda.is_bf16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"cosine\",\n",
    "        seed = 3407,\n",
    "        output_dir = \"tinyCheckpoints\",\n",
    "        save_strategy = \"steps\",\n",
    "        save_steps = 100,\n",
    "        # report_to=\"wandb\",\n",
    "        \n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n",
      "0.771 GB of memory reserved.\n"
     ]
    }
   ],
   "source": [
    "gpu_stats = torch.cuda.get_device_properties(0)\n",
    "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
    "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
    "print(f\"{start_gpu_memory} GB of memory reserved.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
    "used_percentage = round(used_memory         /max_memory*100, 3)\n",
    "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
    "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
    "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
    "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
    "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
    "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# Save Model\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "outputs": [],
   "source": [
    "# model.save_pretrained(\"full_new_model\") \n",
    "# tokenizer.save_pretrained(\"full_new_model\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.push_to_hub(\"Chord-Llama/Llama-3-chord-llama-realFullModel\", token = '') # Online saving\n",
    "# tokenizer.push_to_hub(\"Chord-Llama/Llama-3-chord-llama-realFullModel\", token = '') # Online saving\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.push_to_hub_merged(\"Chord-Llama/Llama-3-chord-llama-fullModel\", tokenizer, save_method = \"merged_16bit\", token =)\n",
    "# tokenizer.push_to_hub_merged(\"Chord-Llama/Llama-3-chord-llama-fullModel\", tokenizer, save_method = \"merged_16bit\", token =)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9479a173e4cd401d866f66c3b967947e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "README.md:   0%|          | 0.00/576 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f5ef1941ce844569b0db2ff773982689",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_model.safetensors:   0%|          | 0.00/25.3M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved model to https://huggingface.co/Chord-Llama/Llama-3-chord-llama-throwAway\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5452a8e8dfd84dff8c754beb5dd19b33",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model.save_pretrained(\"tinyModel2\") # Local saving\n",
    "tokenizer.save_pretrained(\"tinyModel2\")\n",
    "model.push_to_hub(\"Chord-Llama/Llama-3-chord-llama-tinyModel2\", token = '') \n",
    "tokenizer.push_to_hub(\"Chord-Llama/Llama-3-chord-llama-tinyModel2\", token = '') "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# Load Model or Checkpoint and Test it"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "using cuda, much faster than pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
      "    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n",
      "    Python  3.10.14 (you have 3.10.14)\n",
      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
      "  Set XFORMERS_MORE_DETAILS=1 for more details\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Fast Llama patching release 2024.4\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n",
      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
      "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
      "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers\n",
      "Unsloth 2024.4 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "from transformers import TextStreamer\n",
    "from unsloth import FastLanguageModel\n",
    "\n",
    "model_or_checkpoint = 'tiny_llama_newData'\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = model_or_checkpoint, \n",
    "    max_seq_length = 2048,\n",
    "    dtype = None,\n",
    "    load_in_4bit = True,\n",
    ")\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "ins = \"\"\"divisions: '480'\n",
    "key:\n",
    "  fifths: '-3'\n",
    "  mode: major\n",
    "time:\n",
    "  beats: '4'\n",
    "  beat-type: '4'\n",
    "clef:\n",
    "  sign: G\n",
    "  line: '2'\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "inp = \"\"\"- a00_note:\n",
    "    rest: null\n",
    "    duration: '960'\n",
    "    type: half\n",
    "  a01_harmony:\n",
    "    root:\n",
    "      root-step: E\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': ''\n",
    "      '#text': major\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: E\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: A\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: B\n",
    "      alter: '-1'\n",
    "      octave: '3'\n",
    "    duration: '960'\n",
    "    type: half\n",
    "  a01_harmony:\n",
    "    root:\n",
    "      root-step: A\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': ''\n",
    "      '#text': major\n",
    "  a02_note:\n",
    "    rest: null\n",
    "    duration: '960'\n",
    "    type: half\n",
    "- a00_harmony:\n",
    "    root:\n",
    "      root-step: E\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': ''\n",
    "      '#text': major\n",
    "  a01_note:\n",
    "    rest: null\n",
    "    duration: '960'\n",
    "    type: half\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: A\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_harmony:\n",
    "    root:\n",
    "      root-step: A\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': ''\n",
    "      '#text': major\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: E\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '1920'\n",
    "    tie:\n",
    "      '@type': start\n",
    "    type: whole\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: E\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '1920'\n",
    "    tie:\n",
    "      '@type': stop\n",
    "    type: whole\n",
    "- a00_harmony:\n",
    "    root:\n",
    "      root-step: F\n",
    "    kind:\n",
    "      '@text': m\n",
    "      '#text': minor\n",
    "  a01_note:\n",
    "    rest: null\n",
    "    duration: '960'\n",
    "    type: half\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: A\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: E\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_harmony:\n",
    "    root:\n",
    "      root-step: B\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': '7'\n",
    "      '#text': dominant\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: D\n",
    "      octave: '4'\n",
    "    duration: '1920'\n",
    "    tie:\n",
    "      '@type': start\n",
    "    type: whole\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<s> \n",
      "\n",
      "### Instruction:\n",
      "divisions: '480'\n",
      "key:\n",
      "  fifths: '-3'\n",
      "  mode: major\n",
      "time:\n",
      "  beats: '4'\n",
      "  beat-type: '4'\n",
      "clef:\n",
      "  sign: G\n",
      "  line: '2'\n",
      "\n",
      "\n",
      "### Input:\n",
      "- a00_note:\n",
      "    rest: null\n",
      "    duration: '960'\n",
      "    type: half\n",
      "  a01_harmony:\n",
      "    root:\n",
      "      root-step: E\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': ''\n",
      "      '#text': major\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: E\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: A\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a01_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: F\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: B\n",
      "      alter: '-1'\n",
      "      octave: '3'\n",
      "    duration: '960'\n",
      "    type: half\n",
      "  a01_harmony:\n",
      "    root:\n",
      "      root-step: A\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': ''\n",
      "      '#text': major\n",
      "  a02_note:\n",
      "    rest: null\n",
      "    duration: '960'\n",
      "    type: half\n",
      "- a00_harmony:\n",
      "    root:\n",
      "      root-step: E\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': ''\n",
      "      '#text': major\n",
      "  a01_note:\n",
      "    rest: null\n",
      "    duration: '960'\n",
      "    type: half\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: A\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a01_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: F\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_harmony:\n",
      "    root:\n",
      "      root-step: A\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': ''\n",
      "      '#text': major\n",
      "  a01_note:\n",
      "    pitch:\n",
      "      step: E\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '1920'\n",
      "    tie:\n",
      "      '@type': start\n",
      "    type: whole\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: E\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '1920'\n",
      "    tie:\n",
      "      '@type': stop\n",
      "    type: whole\n",
      "- a00_harmony:\n",
      "    root:\n",
      "      root-step: F\n",
      "    kind:\n",
      "      '@text': m\n",
      "      '#text': minor\n",
      "  a01_note:\n",
      "    rest: null\n",
      "    duration: '960'\n",
      "    type: half\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: F\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: A\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a01_note:\n",
      "    pitch:\n",
      "      step: F\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: F\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: E\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_harmony:\n",
      "    root:\n",
      "      root-step: B\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': '7'\n",
      "      '#text': dominant\n",
      "  a01_note:\n",
      "    pitch:\n",
      "      step: D\n",
      "      octave: '4'\n",
      "    duration: '1920'\n",
      "    tie:\n",
      "      '@type': start\n",
      "    type: whole\n",
      "\n",
      "\n",
      "### Response:\n",
      "- a00_note:\n",
      "   pitch:\n",
      "     step: D\n",
      "     octave: '4'\n",
      "   duration: '1920'\n",
      "   tie:\n",
      "     '@type': stop\n",
      "   type: whole\n",
      "- a00_harmony:\n",
      "   root:\n",
      "     root-step: E\n",
      "     root-alter: '-1'\n",
      "   kind:\n",
      "     '@text': ''\n",
      "     '#text': major\n",
      " a01_note:\n",
      "   rest: null\n",
      "   duration: '960'\n",
      "   type: half\n",
      " a02_note:\n",
      "   pitch:\n",
      "     step: E\n",
      "     alter: '-1'\n",
      "     octave: '4'\n",
      "   duration: '480'\n",
      "   type: quarter\n",
      " a03_note:\n",
      "   pitch:\n",
      "     step: G\n",
      "     octave: '4'\n",
      "   duration: '480'\n",
      "   type: quarter\n",
      "- a00_note:\n",
      "   pitch:\n",
      "     step: A\n",
      "     alter: '-1'\n",
      "     octave: '4'\n",
      "   duration: '480'\n",
      "   type: quarter\n",
      " a01_note:\n",
      "   pitch:\n",
      "     step: G\n",
      "     octave: '4'\n",
      "   duration: '480'\n",
      "   type: quarter\n",
      " a02_note:\n",
      "   pitch:\n",
      "     step: F\n",
      "     octave: '4'\n",
      "   duration: '480'\n",
      "   type: quarter\n",
      " a03_note:\n",
      "   pitch:\n",
      "     step: G\n",
      "     octave: '4'\n",
      "   duration: '480'\n",
      "   type: quarter\n",
      "- a00_note:\n",
      "   pitch:\n",
      "     step: B\n",
      "     alter: '-1'\n",
      "     octave: '3'\n",
      "   duration: '960'\n",
      "   type: half\n",
      " a01_note:\n",
      "   rest: null\n",
      "   duration: '960'\n",
      "   type: half\n",
      "- a00_harmony:\n",
      "   root:\n",
      "     root-step: A\n",
      "     root-alter: '-1'\n",
      "   kind:\n",
      "     '@text': ''\n",
      "     '#text': major\n",
      " a01_note:\n",
      "   rest: null\n",
      "Generated in 19.536226272583008 seconds.\n"
     ]
    }
   ],
   "source": [
    "# alpaca_prompt = Copied from above\n",
    "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
    "inputs = tokenizer(\n",
    "[\n",
    "    alpaca_prompt.format( ins, inp,\n",
    "        \"\", # output - leave this blank for generation!\n",
    "    )\n",
    "], return_tensors = \"pt\").to(\"cuda\")\n",
    "\n",
    "start_time = time.time()\n",
    "text_streamer = TextStreamer(tokenizer)\n",
    "_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 500)\n",
    "print(f\"Generated in {time.time() - start_time} seconds.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "# Pipeline Toy Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import time\n",
    "from transformers import (\n",
    "    AutoModelForCausalLM,\n",
    "    AutoTokenizer,\n",
    "    BitsAndBytesConfig,\n",
    "    HfArgumentParser,\n",
    "    TrainingArguments,\n",
    "    pipeline,\n",
    "    logging,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# reference: https://colab.research.google.com/drive/134o_cXcMe_lsvl15ZE_4Y75Kstepsntu?usp=sharing\n",
    "def load_model(model_name):\n",
    "    # Load tokenizer and model with QLoRA configuration\n",
    "    compute_dtype = getattr(torch, \"float16\")\n",
    "\n",
    "    bnb_config = BitsAndBytesConfig(\n",
    "        load_in_4bit=True,\n",
    "        bnb_4bit_quant_type=\"float16\",\n",
    "        bnb_4bit_compute_dtype=compute_dtype,\n",
    "        bnb_4bit_use_double_quant=False,\n",
    "    )\n",
    "\n",
    "    if compute_dtype == torch.float16:\n",
    "        major, _ = torch.cuda.get_device_capability()\n",
    "        if major >= 8:\n",
    "            print(\"=\" * 80)\n",
    "            print(\"Your GPU supports bfloat16, you can accelerate training with the argument --bf16\")\n",
    "            print(\"=\" * 80)\n",
    "\n",
    "    model = AutoModelForCausalLM.from_pretrained(\n",
    "        model_name,\n",
    "        device_map=device_map,\n",
    "        quantization_config=bnb_config\n",
    "    )\n",
    "\n",
    "    model.config.use_cache = False\n",
    "    model.config.pretraining_tp = 1\n",
    "\n",
    "    # Load LoRA configuration\n",
    "    peft_config = LoraConfig(\n",
    "        lora_alpha=32,\n",
    "        lora_dropout=0,\n",
    "        r=32,\n",
    "        bias=\"none\",\n",
    "        task_type=\"CAUSAL_LM\",\n",
    "    )\n",
    "\n",
    "    # Load Tokenizer\n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
    "    tokenizer.pad_token = tokenizer.eos_token\n",
    "    tokenizer.padding_side = \"right\"\n",
    "\n",
    "    return model, tokenizer, peft_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# reference: https://colab.research.google.com/drive/134o_cXcMe_lsvl15ZE_4Y75Kstepsntu?usp=sharing#scrollTo=XK4lTwqFflzE\n",
    " \n",
    "def text_gen_eval_wrapper(model, tokenizer, prompt, model_id=1, show_metrics=True, temp=0.7, max_new_tokens=200):\n",
    "\n",
    "    # Initialize the pipeline\n",
    "    pipe = pipeline(task=\"text-generation\",\n",
    "                    model=model,\n",
    "                    tokenizer=tokenizer,\n",
    "                    max_new_tokens=max_new_tokens,\n",
    "                    do_sample=True,\n",
    "                    temperature=temp)\n",
    "\n",
    "    # Generate text using the pipeline\n",
    "    pipe = pipeline(task=\"text-generation\",\n",
    "                    model=model,\n",
    "                    tokenizer=tokenizer,\n",
    "                    max_new_tokens=max_new_tokens)\n",
    "    result = pipe(prompt)\n",
    "    generated_text = result[0]['generated_text']\n",
    "\n",
    "\n",
    "    index = generated_text.find(\"### Response:\")\n",
    "    if index != -1:\n",
    "        \n",
    "        substring_after_assistant = generated_text[index + len(\"### Response:\"):].strip()\n",
    "    else:\n",
    "\n",
    "        substring_after_assistant = generated_text.strip()\n",
    "\n",
    "    return substring_after_assistant"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unused kwargs: ['quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "================================================================================\n",
      "Your GPU supports bfloat16, you can accelerate training with the argument --bf16\n",
      "================================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/martin/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/transformers/quantizers/auto.py:159: UserWarning: You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used.\n",
      "  warnings.warn(warning_msg)\n"
     ]
    }
   ],
   "source": [
    "from peft import LoraConfig\n",
    "device_map = {\"\": 0}\n",
    "model, tokenizer, peft_config = load_model('tiny_llama_newData')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpaca_prompt = \"\"\"\n",
    "\n",
    "### Instruction:\n",
    "{}\n",
    "\n",
    "### Input:\n",
    "{}\n",
    "\n",
    "### Response:\n",
    "{}\"\"\"\n",
    "\n",
    "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
    "def formatting_prompts_func(examples):\n",
    "    instructions = examples[\"instruction\"]\n",
    "    inputs       = examples[\"input\"]\n",
    "    outputs      = examples[\"output\"]\n",
    "    texts = []\n",
    "    for instruction, input, output in zip(instructions, inputs, outputs):\n",
    "        # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
    "        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
    "        texts.append(text)\n",
    "    return { \"text\" : texts, }\n",
    "pass\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "ins = \"\"\"divisions: '480'\n",
    "key:\n",
    "  fifths: '-3'\n",
    "  mode: major\n",
    "time:\n",
    "  beats: '4'\n",
    "  beat-type: '4'\n",
    "clef:\n",
    "  sign: G\n",
    "  line: '2'\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "inp = \"\"\"- a00_note:\n",
    "    rest: null\n",
    "    duration: '960'\n",
    "    type: half\n",
    "  a01_harmony:\n",
    "    root:\n",
    "      root-step: E\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': ''\n",
    "      '#text': major\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: E\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: A\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: B\n",
    "      alter: '-1'\n",
    "      octave: '3'\n",
    "    duration: '960'\n",
    "    type: half\n",
    "  a01_harmony:\n",
    "    root:\n",
    "      root-step: A\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': ''\n",
    "      '#text': major\n",
    "  a02_note:\n",
    "    rest: null\n",
    "    duration: '960'\n",
    "    type: half\n",
    "- a00_harmony:\n",
    "    root:\n",
    "      root-step: E\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': ''\n",
    "      '#text': major\n",
    "  a01_note:\n",
    "    rest: null\n",
    "    duration: '960'\n",
    "    type: half\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: A\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_harmony:\n",
    "    root:\n",
    "      root-step: A\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': ''\n",
    "      '#text': major\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: E\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '1920'\n",
    "    tie:\n",
    "      '@type': start\n",
    "    type: whole\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: E\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '1920'\n",
    "    tie:\n",
    "      '@type': stop\n",
    "    type: whole\n",
    "- a00_harmony:\n",
    "    root:\n",
    "      root-step: F\n",
    "    kind:\n",
    "      '@text': m\n",
    "      '#text': minor\n",
    "  a01_note:\n",
    "    rest: null\n",
    "    duration: '960'\n",
    "    type: half\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: G\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_note:\n",
    "    pitch:\n",
    "      step: A\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a02_note:\n",
    "    pitch:\n",
    "      step: F\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "  a03_note:\n",
    "    pitch:\n",
    "      step: E\n",
    "      alter: '-1'\n",
    "      octave: '4'\n",
    "    duration: '480'\n",
    "    type: quarter\n",
    "- a00_harmony:\n",
    "    root:\n",
    "      root-step: B\n",
    "      root-alter: '-1'\n",
    "    kind:\n",
    "      '@text': '7'\n",
    "      '#text': dominant\n",
    "  a01_note:\n",
    "    pitch:\n",
    "      step: D\n",
    "      octave: '4'\n",
    "    duration: '1920'\n",
    "    tie:\n",
    "      '@type': start\n",
    "    type: whole\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\\n\\n### Instruction:\\ndivisions: '480'\\nkey:\\n  fifths: '-3'\\n  mode: major\\ntime:\\n  beats: '4'\\n  beat-type: '4'\\nclef:\\n  sign: G\\n  line: '2'\\n\\n\\n### Input:\\n- a00_note:\\n    rest: null\\n    duration: '960'\\n    type: half\\n  a01_harmony:\\n    root:\\n      root-step: E\\n      root-alter: '-1'\\n    kind:\\n      '@text': ''\\n      '#text': major\\n  a02_note:\\n    pitch:\\n      step: E\\n      alter: '-1'\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a03_note:\\n    pitch:\\n      step: G\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n- a00_note:\\n    pitch:\\n      step: A\\n      alter: '-1'\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a01_note:\\n    pitch:\\n      step: G\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a02_note:\\n    pitch:\\n      step: F\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a03_note:\\n    pitch:\\n      step: G\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n- a00_note:\\n    pitch:\\n      step: B\\n      alter: '-1'\\n      octave: '3'\\n    duration: '960'\\n    type: half\\n  a01_harmony:\\n    root:\\n      root-step: A\\n      root-alter: '-1'\\n    kind:\\n      '@text': ''\\n      '#text': major\\n  a02_note:\\n    rest: null\\n    duration: '960'\\n    type: half\\n- a00_harmony:\\n    root:\\n      root-step: E\\n      root-alter: '-1'\\n    kind:\\n      '@text': ''\\n      '#text': major\\n  a01_note:\\n    rest: null\\n    duration: '960'\\n    type: half\\n  a02_note:\\n    pitch:\\n      step: G\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a03_note:\\n    pitch:\\n      step: G\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n- a00_note:\\n    pitch:\\n      step: A\\n      alter: '-1'\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a01_note:\\n    pitch:\\n      step: G\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a02_note:\\n    pitch:\\n      step: G\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a03_note:\\n    pitch:\\n      step: F\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n- a00_harmony:\\n    root:\\n      root-step: A\\n      root-alter: '-1'\\n    kind:\\n      '@text': ''\\n      '#text': major\\n  a01_note:\\n    pitch:\\n      step: E\\n      alter: '-1'\\n      octave: '4'\\n    duration: '1920'\\n    tie:\\n      '@type': start\\n    type: whole\\n- a00_note:\\n    pitch:\\n      step: E\\n      alter: '-1'\\n      octave: '4'\\n    duration: '1920'\\n    tie:\\n      '@type': stop\\n    type: whole\\n- a00_harmony:\\n    root:\\n      root-step: F\\n    kind:\\n      '@text': m\\n      '#text': minor\\n  a01_note:\\n    rest: null\\n    duration: '960'\\n    type: half\\n  a02_note:\\n    pitch:\\n      step: F\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a03_note:\\n    pitch:\\n      step: G\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n- a00_note:\\n    pitch:\\n      step: A\\n      alter: '-1'\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a01_note:\\n    pitch:\\n      step: F\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a02_note:\\n    pitch:\\n      step: F\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n  a03_note:\\n    pitch:\\n      step: E\\n      alter: '-1'\\n      octave: '4'\\n    duration: '480'\\n    type: quarter\\n- a00_harmony:\\n    root:\\n      root-step: B\\n      root-alter: '-1'\\n    kind:\\n      '@text': '7'\\n      '#text': dominant\\n  a01_note:\\n    pitch:\\n      step: D\\n      octave: '4'\\n    duration: '1920'\\n    tie:\\n      '@type': start\\n    type: whole\\n\\n\\n### Response:\\n\""
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "message = alpaca_prompt.format( ins, inp,\n",
    "        \"\", # output - leave this blank for generation!\n",
    "    )\n",
    "message"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated in 39.8182430267334 seconds.\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: D\n",
      "      octave: '4'\n",
      "    duration: '1920'\n",
      "    tie:\n",
      "      '@type': stop\n",
      "    type: whole\n",
      "- a00_harmony:\n",
      "    root:\n",
      "      root-step: E\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': ''\n",
      "      '#text': major\n",
      "  a01_note:\n",
      "    rest: null\n",
      "    duration: '960'\n",
      "    type: half\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: E\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: A\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a01_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: F\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: B\n",
      "      alter: '-1'\n",
      "      octave: '3'\n",
      "    duration: '960'\n",
      "    type: half\n",
      "  a01_note:\n",
      "    rest: null\n",
      "    duration: '960'\n",
      "    type: half\n",
      "- a00_harmony:\n",
      "    root:\n",
      "      root-step: A\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': ''\n",
      "      '#text': major\n",
      "  a01_note:\n",
      "    rest: null\n"
     ]
    }
   ],
   "source": [
    "start_time = time.time()\n",
    "generated_text = text_gen_eval_wrapper(model, tokenizer, message, show_metrics=False,  temp=.1, max_new_tokens=500)\n",
    "print(f\"Generated in {time.time() - start_time} seconds.\")\n",
    "print(generated_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated in 39.7313814163208 seconds.\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: D\n",
      "      octave: '4'\n",
      "    duration: '1920'\n",
      "    tie:\n",
      "      '@type': stop\n",
      "    type: whole\n",
      "- a00_harmony:\n",
      "    root:\n",
      "      root-step: E\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': ''\n",
      "      '#text': major\n",
      "  a01_note:\n",
      "    rest: null\n",
      "    duration: '960'\n",
      "    type: half\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: E\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: A\n",
      "      alter: '-1'\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a01_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a02_note:\n",
      "    pitch:\n",
      "      step: F\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "  a03_note:\n",
      "    pitch:\n",
      "      step: G\n",
      "      octave: '4'\n",
      "    duration: '480'\n",
      "    type: quarter\n",
      "- a00_note:\n",
      "    pitch:\n",
      "      step: B\n",
      "      alter: '-1'\n",
      "      octave: '3'\n",
      "    duration: '960'\n",
      "    type: half\n",
      "  a01_note:\n",
      "    rest: null\n",
      "    duration: '960'\n",
      "    type: half\n",
      "- a00_harmony:\n",
      "    root:\n",
      "      root-step: A\n",
      "      root-alter: '-1'\n",
      "    kind:\n",
      "      '@text': ''\n",
      "      '#text': major\n",
      "  a01_note:\n",
      "    rest: null\n"
     ]
    }
   ],
   "source": [
    "start_time = time.time()\n",
    "generated_text = text_gen_eval_wrapper(model, tokenizer, message, show_metrics=False, max_new_tokens=500)\n",
    "print(f\"Generated in {time.time() - start_time} seconds.\")\n",
    "print(generated_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "014479d51b95488da6dbdbfa3e7adce3": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_1b01f784c04e4d11a2afd72284646138",
      "placeholder": "​",
      "style": "IPY_MODEL_5f7db89c6f0b4c968dd8f43e23f54365",
      "value": "Map (num_proc=2): 100%"
     }
    },
    "08a476cadd42469eb2234ac503f8447e": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "09a4c26458f24fc49aadae2fee60acc4": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "0c2631134e6248459fdfe8f48205df25": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "0df7c15449ed45edb9094e5d58d0731d": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "0ec293e756104409b563d3e561ec2937": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "0f36928c39f641618e89a27445c5cbc2": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_7d03bcf25c774fed864b09770cfe74ea",
      "placeholder": "​",
      "style": "IPY_MODEL_09a4c26458f24fc49aadae2fee60acc4",
      "value": "special_tokens_map.json: 100%"
     }
    },
    "10c75c3affac4c839a065c5d07ff7fd3": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_7d9adc9aaa6540c3b80fb2b4e02f7b66",
       "IPY_MODEL_2e24b601930a44b58d5d1f53b33315f7",
       "IPY_MODEL_bb0f6ed8b0314e119254b574de524f6f"
      ],
      "layout": "IPY_MODEL_355b4205fa0e4612954dc918c9cc6449"
     }
    },
    "126fe7d2d8cd454eb7f71bec0b9b6e01": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_f753790028d14420b2683c2bc16dba2b",
      "max": 121,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_b11371e3382440219af0d82b4fae0fe8",
      "value": 121
     }
    },
    "15248beee1f042f29ba17deb3e29c90b": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_396e74209ce9460a885b2236d1fb7b97",
      "placeholder": "​",
      "style": "IPY_MODEL_a8ec27381f564d7cb6962ee503d2eae6",
      "value": "tokenizer.json: 100%"
     }
    },
    "15f5446cc9d0433281001eb149c51cdd": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "161dc482ea1f4f129f355ee4d20e8cd5": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_15f5446cc9d0433281001eb149c51cdd",
      "placeholder": "​",
      "style": "IPY_MODEL_f27dfff3b0534f558d91747a91629523",
      "value": "Downloading readme: 100%"
     }
    },
    "1a4b589eed4f4d9694cae1f69330836a": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "1b01f784c04e4d11a2afd72284646138": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "1bf51ccd8640436482357b6e06a2f850": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_8ce3a2df43bf48429e29bd73dac5e638",
      "placeholder": "​",
      "style": "IPY_MODEL_db00b79287da4c93b425d4b07b0f7b72",
      "value": " 11.6k/11.6k [00:00&lt;00:00, 475kB/s]"
     }
    },
    "1ea78447b4fc4a38aaff65f7954ae65e": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "1eef300d637145d68a660db46ca9b14a": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "208f49c8a05f4f25906fb7df2fd95d7c": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_dd7c9e9d3fca4f0aa6893a48c92e6748",
      "max": 11610,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_606650950feb431e825cb7ea79c5a1d7",
      "value": 11610
     }
    },
    "2c95bc4f3f9d42be8f03bef23242af66": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "2d0462c3fd21481fa6adf8fc9df5c28f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_15248beee1f042f29ba17deb3e29c90b",
       "IPY_MODEL_c75a509df90340e5857692400bbb8af8",
       "IPY_MODEL_e7611888e9fb47588aee414cdf0ecf27"
      ],
      "layout": "IPY_MODEL_2c95bc4f3f9d42be8f03bef23242af66"
     }
    },
    "2e24b601930a44b58d5d1f53b33315f7": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_4bba5aefb61e4b458381bff09abbe96a",
      "max": 44307561,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_6f37c03d14314e59bef4ff1bc985e866",
      "value": 44307561
     }
    },
    "3018574e40ef4f5284a1832d2efb1121": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "30f2bb04d6754a54af59439cef40ff87": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "320ad99d9e91463986ca91073e3729fa": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_08a476cadd42469eb2234ac503f8447e",
      "max": 51760,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_8019cfd942a0477e92cc3a34ecc826e6",
      "value": 51760
     }
    },
    "348691f1aa714d16bfc3148b39518b36": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "34f78b3e7ea0400b90d48fb2809a654d": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "355b4205fa0e4612954dc918c9cc6449": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "396e74209ce9460a885b2236d1fb7b97": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "3b9a0019602a42cd8bbd80a77be5cfcd": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "3cfc4b6de45f4ff5b8f23c3b79e6503f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_d091d3f8b01a473ea81f5f62567a6407",
      "placeholder": "​",
      "style": "IPY_MODEL_973e0a9b040f494895f60ba8e1468c6e",
      "value": " 121/121 [00:00&lt;00:00, 4.86kB/s]"
     }
    },
    "3ed36820b9244e878c6610c589d7d821": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_5557222f4157411d9250ad82827a2f8f",
      "placeholder": "​",
      "style": "IPY_MODEL_b8d26c1ec5c44e1f9c8d20bb6d4c9d1e",
      "value": " 50.9k/50.9k [00:00&lt;00:00, 1.20MB/s]"
     }
    },
    "41eaa431bdd94075b0c41899c14c3fc3": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "451a5048b6f84360aacbda342448e0da": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_488bb3579b99411ea03d25c0e916e629",
      "max": 1140,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_ab53150e33004a41ab28c06833ed5aa0",
      "value": 1140
     }
    },
    "45560e601cdb484c8e9293dece7555db": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_9dd2481401cc43acb405a64e9ce9f13f",
       "IPY_MODEL_772124f5d8f54e959dff5d56d0335e4c",
       "IPY_MODEL_9e3f29ee68524e749a3dbf1ba32653b6"
      ],
      "layout": "IPY_MODEL_fd8ffc5653b94428bb3b82d3979ef54c"
     }
    },
    "467bd804fb244a298b8caaf0c3b3a3d4": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "469b8be78b924d3aa90335798775e1c6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "488bb3579b99411ea03d25c0e916e629": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "4bba5aefb61e4b458381bff09abbe96a": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "4bf5cf0af2e441bf8d101795f8a82594": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "50bd0e7f67f94c9eab8f42493332041c": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "526e11b93f064ace9b6a255da796fdd7": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "5326096097064f4aa0650ca59ead43aa": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "5480935a6ba341bbb62cbfb7d8707fec": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "5557222f4157411d9250ad82827a2f8f": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "5b40ea812830452c8bdc46dbc0dda54f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "5b7210bdfdd547abb29abbbb26dce37e": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_adc97fcb04834c7b9417597d9de135cd",
       "IPY_MODEL_f5b2f797ddae451e8fc1dca9bece278c",
       "IPY_MODEL_6bf7fcaf8dc347daaf2f5b04fac24300"
      ],
      "layout": "IPY_MODEL_3018574e40ef4f5284a1832d2efb1121"
     }
    },
    "5c64d6e41ddc46da81e4f92f4c88e6d1": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "5e1d1523a0c34285a6ef4aa13e9cc645": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "5f7db89c6f0b4c968dd8f43e23f54365": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "5fc82503d5984fc0bfe364b2ae5ad530": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "6038bb8ebaf945da8e8c470fa7d436ec": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "606650950feb431e825cb7ea79c5a1d7": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "60d93f53511c4fc1adf11bacf22590cc": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "66f169549d2847e097cb9788e3e4d29c": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_a3709b260001400e85db6464542c66d3",
       "IPY_MODEL_7d7726f17a4a462a871f5a97b3d66928",
       "IPY_MODEL_3ed36820b9244e878c6610c589d7d821"
      ],
      "layout": "IPY_MODEL_eb07173547f14d428d8549e88b4f9564"
     }
    },
    "6bf7fcaf8dc347daaf2f5b04fac24300": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_8a372927fe8d4a09b4e35bdfc77a6136",
      "placeholder": "​",
      "style": "IPY_MODEL_4bf5cf0af2e441bf8d101795f8a82594",
      "value": " 51760/0 [00:00&lt;00:00, 126317.04 examples/s]"
     }
    },
    "6f37c03d14314e59bef4ff1bc985e866": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "72072eb832db4889beef6f3fa5d45044": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_467bd804fb244a298b8caaf0c3b3a3d4",
      "placeholder": "​",
      "style": "IPY_MODEL_5480935a6ba341bbb62cbfb7d8707fec",
      "value": " 5.70G/5.70G [02:23&lt;00:00, 25.8MB/s]"
     }
    },
    "772124f5d8f54e959dff5d56d0335e4c": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_3b9a0019602a42cd8bbd80a77be5cfcd",
      "max": 51760,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_5fc82503d5984fc0bfe364b2ae5ad530",
      "value": 51760
     }
    },
    "7734e446cfbd4e9b9f796ba662f2ab71": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "7d03bcf25c774fed864b09770cfe74ea": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "7d7726f17a4a462a871f5a97b3d66928": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_5c64d6e41ddc46da81e4f92f4c88e6d1",
      "max": 50941,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_b46d2111e0ee41b9b112f7c9fc65028f",
      "value": 50941
     }
    },
    "7d9adc9aaa6540c3b80fb2b4e02f7b66": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_f9e8a5f5102040f380d3287a93cfa045",
      "placeholder": "​",
      "style": "IPY_MODEL_ab3e06fd917e4cc59f5b8e275caba930",
      "value": "Downloading data: 100%"
     }
    },
    "8019cfd942a0477e92cc3a34ecc826e6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "84569f44a689473889246c2eeab5d8a1": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_5e1d1523a0c34285a6ef4aa13e9cc645",
      "placeholder": "​",
      "style": "IPY_MODEL_e4ff98a12bc1424dadd9a8dec3afb2b3",
      "value": "model.safetensors: 100%"
     }
    },
    "8516663d35be4bf48ce4c54f58188075": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "85ee709c047c49c7b7888caf9eff3a54": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_84569f44a689473889246c2eeab5d8a1",
       "IPY_MODEL_999ed6e6f8b24c5bbfd582c478f45c57",
       "IPY_MODEL_72072eb832db4889beef6f3fa5d45044"
      ],
      "layout": "IPY_MODEL_b87419aaa7cf4ed48b96ebce680bb29b"
     }
    },
    "8a372927fe8d4a09b4e35bdfc77a6136": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "8ce3a2df43bf48429e29bd73dac5e638": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "92eb4d8be9a64892ae5e92c809f568d0": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "96f9b00a55f34d4c8a4ed3c496e1ffbf": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_0f36928c39f641618e89a27445c5cbc2",
       "IPY_MODEL_d4e450ceb7da45e9b8ae82bec8295343",
       "IPY_MODEL_b997bc55dca04d0f8880447ec9f9ba31"
      ],
      "layout": "IPY_MODEL_b1a3d509b7e643fca992dd838bacc203"
     }
    },
    "973e0a9b040f494895f60ba8e1468c6e": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "980076d4a6c44020aed6d19ebfaaaa54": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_1a4b589eed4f4d9694cae1f69330836a",
      "placeholder": "​",
      "style": "IPY_MODEL_a419b86d8ff0428b88843b55cf0be657",
      "value": " 1.14k/1.14k [00:00&lt;00:00, 74.0kB/s]"
     }
    },
    "999ed6e6f8b24c5bbfd582c478f45c57": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_92eb4d8be9a64892ae5e92c809f568d0",
      "max": 5702746405,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_5b40ea812830452c8bdc46dbc0dda54f",
      "value": 5702746405
     }
    },
    "9a65ad628ffc4e2d87479514be357617": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_a98617118be94b73bbd2043c5dd65621",
       "IPY_MODEL_126fe7d2d8cd454eb7f71bec0b9b6e01",
       "IPY_MODEL_3cfc4b6de45f4ff5b8f23c3b79e6503f"
      ],
      "layout": "IPY_MODEL_f50c67dc31dc48aa978909d8967c5294"
     }
    },
    "9b3a7554fbf14276b208fce9d24b5b89": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "9dd2481401cc43acb405a64e9ce9f13f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_e288762221be4e0796631bb7b41bea56",
      "placeholder": "​",
      "style": "IPY_MODEL_f94493a870af47c98ded7a71e1614681",
      "value": "Map: 100%"
     }
    },
    "9e3f29ee68524e749a3dbf1ba32653b6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_9b3a7554fbf14276b208fce9d24b5b89",
      "placeholder": "​",
      "style": "IPY_MODEL_c8491efcfb72456f8d629acc49dfac90",
      "value": " 51760/51760 [00:00&lt;00:00, 80588.64 examples/s]"
     }
    },
    "a3709b260001400e85db6464542c66d3": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_50bd0e7f67f94c9eab8f42493332041c",
      "placeholder": "​",
      "style": "IPY_MODEL_b41fbbe7b59b4e13b17244edba47af30",
      "value": "tokenizer_config.json: 100%"
     }
    },
    "a419b86d8ff0428b88843b55cf0be657": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "a8ec27381f564d7cb6962ee503d2eae6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "a96216197bda4d73afdfd657567bd059": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "a98617118be94b73bbd2043c5dd65621": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_5326096097064f4aa0650ca59ead43aa",
      "placeholder": "​",
      "style": "IPY_MODEL_34f78b3e7ea0400b90d48fb2809a654d",
      "value": "generation_config.json: 100%"
     }
    },
    "ab3e06fd917e4cc59f5b8e275caba930": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "ab53150e33004a41ab28c06833ed5aa0": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "acb68ec0ac33408b8d02f22cfbbe4f01": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_8516663d35be4bf48ce4c54f58188075",
      "placeholder": "​",
      "style": "IPY_MODEL_60d93f53511c4fc1adf11bacf22590cc",
      "value": " 51760/51760 [00:53&lt;00:00, 1849.32 examples/s]"
     }
    },
    "adc97fcb04834c7b9417597d9de135cd": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_6038bb8ebaf945da8e8c470fa7d436ec",
      "placeholder": "​",
      "style": "IPY_MODEL_c9b628fa9ce34162b6d04d4f191f819d",
      "value": "Generating train split: "
     }
    },
    "b11371e3382440219af0d82b4fae0fe8": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "b1a3d509b7e643fca992dd838bacc203": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "b41fbbe7b59b4e13b17244edba47af30": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "b46d2111e0ee41b9b112f7c9fc65028f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "b869fd94c21146b4917ff3e60654664b": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "b87419aaa7cf4ed48b96ebce680bb29b": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "b8d26c1ec5c44e1f9c8d20bb6d4c9d1e": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "b997bc55dca04d0f8880447ec9f9ba31": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_a96216197bda4d73afdfd657567bd059",
      "placeholder": "​",
      "style": "IPY_MODEL_db420165996143ecb8b6624ef6341912",
      "value": " 301/301 [00:00&lt;00:00, 6.61kB/s]"
     }
    },
    "bb0f6ed8b0314e119254b574de524f6f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_0c2631134e6248459fdfe8f48205df25",
      "placeholder": "​",
      "style": "IPY_MODEL_0ec293e756104409b563d3e561ec2937",
      "value": " 44.3M/44.3M [00:01&lt;00:00, 46.8MB/s]"
     }
    },
    "bbea39204f514d65a92dbe79e4d8e697": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_161dc482ea1f4f129f355ee4d20e8cd5",
       "IPY_MODEL_208f49c8a05f4f25906fb7df2fd95d7c",
       "IPY_MODEL_1bf51ccd8640436482357b6e06a2f850"
      ],
      "layout": "IPY_MODEL_1ea78447b4fc4a38aaff65f7954ae65e"
     }
    },
    "c75a509df90340e5857692400bbb8af8": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_c7fe113e1b3b46968d97cdc6fb1dc241",
      "max": 9084463,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_fcd3a5f67e164712bcf7e8e7ff1f770a",
      "value": 9084463
     }
    },
    "c78e69db216844a39fac827349845f81": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_f2c23b7309ec430c9cb984ba8f511302",
       "IPY_MODEL_451a5048b6f84360aacbda342448e0da",
       "IPY_MODEL_980076d4a6c44020aed6d19ebfaaaa54"
      ],
      "layout": "IPY_MODEL_b869fd94c21146b4917ff3e60654664b"
     }
    },
    "c7fe113e1b3b46968d97cdc6fb1dc241": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "c8491efcfb72456f8d629acc49dfac90": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "c9b628fa9ce34162b6d04d4f191f819d": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "d091d3f8b01a473ea81f5f62567a6407": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "d4e450ceb7da45e9b8ae82bec8295343": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_41eaa431bdd94075b0c41899c14c3fc3",
      "max": 301,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_7734e446cfbd4e9b9f796ba662f2ab71",
      "value": 301
     }
    },
    "d92aeeaaa8b74016bfcbe49cec44f0d7": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": "20px"
     }
    },
    "db00b79287da4c93b425d4b07b0f7b72": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "db420165996143ecb8b6624ef6341912": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "dd7c9e9d3fca4f0aa6893a48c92e6748": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "e288762221be4e0796631bb7b41bea56": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "e4ff98a12bc1424dadd9a8dec3afb2b3": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "e7611888e9fb47588aee414cdf0ecf27": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_1eef300d637145d68a660db46ca9b14a",
      "placeholder": "​",
      "style": "IPY_MODEL_469b8be78b924d3aa90335798775e1c6",
      "value": " 9.08M/9.08M [00:00&lt;00:00, 15.6MB/s]"
     }
    },
    "eb07173547f14d428d8549e88b4f9564": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "f07f34e9b5c649469ac667239b0fbaad": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_014479d51b95488da6dbdbfa3e7adce3",
       "IPY_MODEL_320ad99d9e91463986ca91073e3729fa",
       "IPY_MODEL_acb68ec0ac33408b8d02f22cfbbe4f01"
      ],
      "layout": "IPY_MODEL_30f2bb04d6754a54af59439cef40ff87"
     }
    },
    "f27dfff3b0534f558d91747a91629523": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "f2c23b7309ec430c9cb984ba8f511302": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_526e11b93f064ace9b6a255da796fdd7",
      "placeholder": "​",
      "style": "IPY_MODEL_0df7c15449ed45edb9094e5d58d0731d",
      "value": "config.json: 100%"
     }
    },
    "f50c67dc31dc48aa978909d8967c5294": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "f5b2f797ddae451e8fc1dca9bece278c": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_d92aeeaaa8b74016bfcbe49cec44f0d7",
      "max": 1,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_348691f1aa714d16bfc3148b39518b36",
      "value": 1
     }
    },
    "f753790028d14420b2683c2bc16dba2b": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "f94493a870af47c98ded7a71e1614681": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "f9e8a5f5102040f380d3287a93cfa045": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "fcd3a5f67e164712bcf7e8e7ff1f770a": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "fd8ffc5653b94428bb3b82d3979ef54c": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}