{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "2eSvM9zX_2d3" }, "outputs": [], "source": [ "# import torch\n", "\n", "# !pip install wandb" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# import wandb\n", "# wandb.login()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# %env WANDB_WATCH=all\n", "# %env WANDB_SILENT=true" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# entity = \"wandb\"\n", "# wandb.init(project=\"llama-unsloth-test-project\", name = \"llama-unsloth-test-3\")" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "\n", "# test token limit at 3500 and changed attention dimnsion to 32" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 32, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0,\n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\", \n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = None, \n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"test1\",\n", " save_strategy = \"steps\",\n", " save_steps = 60,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "took 11 mins" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "___" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# test change attention dimension to 64 and test new data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Because we are dealing with long contexts and music tends to be a bit tricky in terms of similarity with query and set of keys, I turned on Rank-Stabalized LoRA (Locally Rank Adapted Linear Attention)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Also, tried to turn on LoftQ on to help with generalization: https://openreview.net/forum?id=LzPWWPAdY4\n", "However, it did not work. There seems to be an error with Unsloth: https://github.com/hiyouga/LLaMA-Factory/issues/3255" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "wandb: fast-sea-14 " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "crashed due to memory issues on step 13. So I brought down the attention headers to 32. Wandb: stellar-breeze-15 " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "actually it might have been the RSLoRA with causing the memory issue. Turned it off. Wandb: wobbly-breeze-16" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 64, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, \n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\",\n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = False,\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"tests\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "6.117 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 1,600 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 167,772,160\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_112603-fijz686r" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run wobbly-breeze-16 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/fijz686r" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [20/20 10:24, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.295900
20.304200
30.289000
40.291100
50.256800
60.240800
70.196100
80.193200
90.184800
100.156200
110.158800
120.138700
130.125300
140.128300
150.119500
160.129200
170.130400
180.115700
190.121000
200.107800

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "666.7279 seconds used for training.\n", "11.11 minutes used for training.\n", "Peak reserved memory = 12.84 GB.\n", "Peak reserved memory for training = 6.723 GB.\n", "Peak reserved memory % of max memory = 128.4 %.\n", "Peak reserved memory for training % of max memory = 67.23 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "overall by coparing the loss from before it performed worse, however, the dataset's input and output is less similar. Took 10 mins" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# test with only 16 attention dimension and using new data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " wandb: autumn-breeze-17" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 16, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, \n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\",\n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = None, \n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"tests\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.605 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 1,600 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 41,943,040\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_114127-4m452ks0" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run autumn-breeze-17 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/4m452ks0" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 08:14, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.295900
20.304200
30.288800
40.290000
50.254400
60.238600
70.194200
80.191500
90.183000
100.154000
110.157500
120.136600
130.124900
140.127600
150.118500
160.128100
170.129200
180.114900
190.119500
200.106400

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "534.8471 seconds used for training.\n", "8.91 minutes used for training.\n", "Peak reserved memory = 11.961 GB.\n", "Peak reserved memory for training = 6.356 GB.\n", "Peak reserved memory % of max memory = 119.61 %.\n", "Peak reserved memory for training % of max memory = 63.56 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "performed almost identical within less time. took 8 mins.\n", "\n", "Definitely used less memory which allowed the GPU to not be as throttled. \n", "\n", "the model did not save because I made a mistake with the step number for saves" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# test with new data and 8 attention dimension" ] }, { "cell_type": "markdown", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "wandb: n fine-sun-18 not right its one after" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, \n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\",\n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = None, \n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"test1\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 1,600 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_184601-sghnalif" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run pretty-frost-46 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/sghnalif" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 08:34, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.295900
20.304200
30.288700
40.289100
50.252800
60.237200
70.192000
80.190100
90.180800
100.152200
110.156800
120.136000
130.124100
140.127000
150.118200
160.127600
170.128900
180.114800
190.119600
200.106400

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "556.5196 seconds used for training.\n", "9.28 minutes used for training.\n", "Peak reserved memory = 11.758 GB.\n", "Peak reserved memory for training = 6.231 GB.\n", "Peak reserved memory % of max memory = 117.58 %.\n", "Peak reserved memory for training % of max memory = 62.31 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "less time only took 8 mins. overall performed best with 16 attention headers. the loss was better here though. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# try rank stabalized LoRA on 16 attention dimension" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 16, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, \n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\",\n", " random_state = 3407,\n", " use_rslora = True, \n", " loftq_config = None, \n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"trank\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.605 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 41,943,040\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_131246-4uu0wyna" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run dandy-vortex-21 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/4uu0wyna" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 09:01, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.253800
20.285200
30.373500
40.193400
50.154800
60.163400
70.139200
80.137600
90.135000
100.168600
110.107500
120.103400
130.105700
140.115200
150.113900
160.098000
170.125100
180.116600
190.114100
200.082700

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "580.8442 seconds used for training.\n", "9.68 minutes used for training.\n", "Peak reserved memory = 13.453 GB.\n", "Peak reserved memory for training = 7.848 GB.\n", "Peak reserved memory % of max memory = 134.53 %.\n", "Peak reserved memory for training % of max memory = 78.48 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "it worked with 16 header nodes. and it performed better than before " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# try rank stabalized LoRA on 8 attention dimension" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, \n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\",\n", " random_state = 3407,\n", " use_rslora = True,\n", " loftq_config = None, \n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False,\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"trank8\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_152310-xma0riok" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run apricot-universe-22 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/xma0riok" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 09:47, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.253800
20.285200
30.381300
40.200500
50.158900
60.168500
70.133300
80.131100
90.137000
100.170900
110.104100
120.103700
130.105500
140.114200
150.112900
160.097900
170.126100
180.116800
190.114700
200.082200

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "633.1061 seconds used for training.\n", "10.55 minutes used for training.\n", "Peak reserved memory = 13.252 GB.\n", "Peak reserved memory for training = 7.725 GB.\n", "Peak reserved memory % of max memory = 132.52 %.\n", "Peak reserved memory for training % of max memory = 77.25 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "performed similar " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# stabalized LoRA rank with attention dimension set to 8 \n", "with parameters tunning per_device_train_batch_size = 1, gradient_accumulation_steps = 2," ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0,\n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\", \n", " random_state = 3407,\n", " use_rslora = True, \n", " loftq_config = False, \n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"trank8loft\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 2\n", "\\ / Total batch size = 2 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_155226-hhbshxde" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run curious-thunder-24 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/hhbshxde" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 01:37, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.302600
20.205100
30.240800
40.227700
50.225900
60.200300
70.193000
80.194500
90.170300
100.259800
110.109600
120.151900
130.166100
140.087300
150.109800
160.092200
170.165800
180.071100
190.089200
200.081400

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "112.476 seconds used for training.\n", "1.87 minutes used for training.\n", "Peak reserved memory = 10.824 GB.\n", "Peak reserved memory for training = 5.297 GB.\n", "Peak reserved memory % of max memory = 108.24 %.\n", "Peak reserved memory for training % of max memory = 52.97 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", "\n", "This was the best one yet! super quick and used the least amount of memory. \n", "\n", "wandb: curious-thunder-24 \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# stabalized LoRA rank with attention dimension set to 8 \n", "with parameters tunning per_device_train_batch_size = 1, gradient_accumulation_steps = 4," ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0,\n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\",\n", " random_state = 3407,\n", " use_rslora = True, \n", " loftq_config = False,\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token \n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " \n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"trank8grad4batch1\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 4 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_155901-awqsiw6c" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run bright-sky-26 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/awqsiw6c" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 03:24, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.253800
20.255500
30.259000
40.294500
50.357700
60.219200
70.150400
80.126700
90.132700
100.097700
110.171300
120.126300
130.101000
140.103500
150.126500
160.130200
170.112900
180.149900
190.144600
200.163100

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "224.2332 seconds used for training.\n", "3.74 minutes used for training.\n", "Peak reserved memory = 10.824 GB.\n", "Peak reserved memory for training = 5.297 GB.\n", "Peak reserved memory % of max memory = 108.24 %.\n", "Peak reserved memory for training % of max memory = 52.97 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 4,\n", "\n", "did not work as fast or as good. \n", "\n", "wandb: bright-sky-26 \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# normal LoRA attention dimension set to 8 \n", "with parameters tunning per_device_train_batch_size = 1, gradient_accumulation_steps = 4," ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, \n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\", \n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = False, \n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"noRank8grad2batch1\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 2\n", "\\ / Total batch size = 2 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_161059-wmujat0b" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run northern-star-29 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/wmujat0b" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 01:43, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.302600
20.205100
30.246600
40.253600
50.254900
60.213000
70.256600
80.227300
90.259000
100.312900
110.149600
120.177200
130.182900
140.097100
150.121700
160.108800
170.179700
180.077900
190.102200
200.090400

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "113.367 seconds used for training.\n", "1.89 minutes used for training.\n", "Peak reserved memory = 10.824 GB.\n", "Peak reserved memory for training = 5.297 GB.\n", "Peak reserved memory % of max memory = 108.24 %.\n", "Peak reserved memory for training % of max memory = 52.97 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", "\n", "\n", "No rank\n", "\n", "it does not perform as well. \n", "\n", "wandb: northern-star-29 " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# changed learning rate from 2e-4 to 4e-04" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8,\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0,\n", " bias = \"none\", \n", " \n", " use_gradient_checkpointing = \"unsloth\", \n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = False, \n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 4e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"noRank8grad2batch1\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 2\n", "\\ / Total batch size = 2 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_165404-alkoysjn" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run jumping-donkey-31 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/alkoysjn" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 01:41, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.302600
20.205100
30.244000
40.235200
50.231600
60.201900
70.203300
80.197600
90.170400
100.258800
110.121000
120.151900
130.172000
140.088100
150.114500
160.092900
170.171200
180.072100
190.094800
200.083800

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "116.0351 seconds used for training.\n", "1.93 minutes used for training.\n", "Peak reserved memory = 10.824 GB.\n", "Peak reserved memory for training = 5.297 GB.\n", "Peak reserved memory % of max memory = 108.24 %.\n", "Peak reserved memory for training % of max memory = 52.97 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", "\n", "\n", "learning rate: 4e-04\n", "\n", "This seems to have outperformed all. \n", "wandb: jumping-donkey-31" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# cosine" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0,\n", " bias = \"none\", \n", " use_gradient_checkpointing = \"unsloth\", \n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = False, \n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 4e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"cosine\",\n", " seed = 3407,\n", " output_dir = \"cosD\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 2\n", "\\ / Total batch size = 2 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_170156-r7rezfkp" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run wise-morning-32 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/r7rezfkp" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 01:51, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.302600
20.205100
30.243600
40.235100
50.231700
60.202500
70.203700
80.197600
90.154000
100.270100
110.117900
120.150000
130.169500
140.087300
150.114300
160.090100
170.164900
180.072000
190.092600
200.083200

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "125.0917 seconds used for training.\n", "2.08 minutes used for training.\n", "Peak reserved memory = 10.824 GB.\n", "Peak reserved memory for training = 5.297 GB.\n", "Peak reserved memory % of max memory = 108.24 %.\n", "Peak reserved memory for training % of max memory = 52.97 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", "\n", "\n", "learning rate: 4e-04 with COSINE\n", "\n", "This seems to have outperformed all. \n", "wandb: wise-morning-32" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# polynomial" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0,\n", " bias = \"none\", \n", " use_gradient_checkpointing = \"unsloth\",\n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = False,\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 3,\n", " warmup_steps = 5,\n", " max_steps = 40,\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"polynomial\",\n", " seed = 3407,\n", " output_dir = \"polyD\",\n", " save_strategy = \"steps\",\n", " save_steps = 60,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 3\n", "\\ / Total batch size = 3 | Total steps = 40\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240509_122040-1n4ycexy" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run snowy-oath-62 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/1n4ycexy" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [40/40 07:44, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.289300
20.214500
30.267100
40.253500
50.297000
60.368800
70.305600
80.228200
90.169300
100.121800
110.159100
120.107000
130.094300
140.179100
150.137500
160.118600
170.079700
180.129500
190.127600
200.097600
210.129100
220.139200
230.084400
240.169900
250.151800
260.158100
270.124100
280.136100
290.106600
300.084100
310.083000
320.084200
330.117600
340.092500
350.107800
360.065100
370.128300
380.119500
390.145800
400.078400

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "479.6709 seconds used for training.\n", "7.99 minutes used for training.\n", "Peak reserved memory = 10.826 GB.\n", "Peak reserved memory for training = 5.299 GB.\n", "Peak reserved memory % of max memory = 108.26 %.\n", "Peak reserved memory for training % of max memory = 52.99 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", "\n", "\n", "learning rate: 4e-04 with polynomial\n", "\n", "This seems to have outperformed all. \n", "wandb: magic-thunder-34 " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# Linear" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in .\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, \n", " bias = \"none\", \n", " use_gradient_checkpointing = \"unsloth\", \n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = False, \n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token \n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_dataset\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", " warmup_steps = 5,\n", " max_steps = 20,\n", " learning_rate = 3e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.001,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"toyCos3_001\",\n", " save_strategy = \"steps\",\n", " save_steps = 20,\n", " # report_to=\"wandb\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "5.527 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 93,742 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 2\n", "\\ / Total batch size = 2 | Total steps = 20\n", " \"-____-\" Number of trainable parameters = 20,971,520\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmartin-alvarez\u001b[0m (\u001b[33mmartin-chivo\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.17.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/martin/cmpe258proj/wandb/run-20240508_174616-6x2u7unl" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run logical-bee-39 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/martin-chivo/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/martin-chivo/huggingface/runs/6x2u7unl" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [20/20 01:38, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.302600
20.205000
30.245400
40.244100
50.241600
60.201900
70.213300
80.200200
90.183300
100.282500
110.117900
120.156200
130.173400
140.088200
150.115300
160.093300
170.167500
180.073100
190.094000
200.086300

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "109.386 seconds used for training.\n", "1.82 minutes used for training.\n", "Peak reserved memory = 10.824 GB.\n", "Peak reserved memory for training = 5.297 GB.\n", "Peak reserved memory % of max memory = 108.24 %.\n", "Peak reserved memory for training % of max memory = 52.97 %.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 2,\n", "\n", "COSINE\n", "\n", "learning rate: 3e-04 with linear\n", "weight_decay = 0.001,\n", "\n", "This seems to have outperformed all. it was the quickest\n", "wandb: logical-bee-39 " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "# Full Model Training" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n", " PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.2.2)\n", " Python 3.10.14 (you have 3.10.14)\n", " Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n", " Memory-efficient attention, SwiGLU, sparse and more won't be available.\n", " Set XFORMERS_MORE_DETAILS=1 for more details\n", "Unsloth: unsloth/tinyllama-bnb-4bit can only handle sequence lengths of at most 2048.\n", "But with kaiokendev's RoPE scaling of 1.709, it can be magically be extended to 3500!\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Fast Llama patching release 2024.4\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3080. Max memory: 10.0 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.2.2. CUDA = 8.6. CUDA Toolkit = 11.8.\n", "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['quant_method']. These kwargs are not used in .\n", "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 3500 \n", "dtype = None \n", "load_in_4bit = True \n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " # model_name = \"unsloth/llama-3-8b-bnb-4bit\",\n", " model_name = \"unsloth/tinyllama-bnb-4bit\", # \"unsloth/tinyllama\" for 16bit loading\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \n", ")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.4 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 8, \n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, \n", " bias = \"none\", \n", " use_gradient_checkpointing = \"unsloth\",\n", " random_state = 3407,\n", " use_rslora = False, \n", " loftq_config = False,\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " \n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_train\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)\n", "\n", "# test_dataset = load_dataset(\"Chord-Llama/chord_llama_data_mini_test\", split = \"test\")\n", "# test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n", "/home/martin/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:342: UserWarning: You passed `packing=True` to the SFTTrainer, and you are training your model with `max_steps` strategy. The dataset will be iterated until the `max_steps` are reached.\n", " warnings.warn(\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " # eval_dataset = test_dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = True, \n", " args = TrainingArguments(\n", " per_device_train_batch_size = 1,\n", " gradient_accumulation_steps = 3,\n", " warmup_steps = 5,\n", " # warmup_ratio = 0.1,\n", " max_steps = -1,\n", " # max_steps = 20,\n", " num_train_epochs=1,\n", " learning_rate = 2e-4,\n", " #check to see if bf16 is supported. if it is use it!\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"cosine\",\n", " seed = 3407,\n", " output_dir = \"tinyCheckpoints\",\n", " save_strategy = \"steps\",\n", " save_steps = 100,\n", " # report_to=\"wandb\",\n", " \n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.\n", "0.771 GB of memory reserved.\n" ] } ], "source": [ "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", "used_percentage = round(used_memory /max_memory*100, 3)\n", "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n", "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n", "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n", "print(f\"Peak reserved memory = {used_memory} GB.\")\n", "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n", "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n", "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# Save Model\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jp-MarkdownHeadingCollapsed": true }, "outputs": [], "source": [ "# model.save_pretrained(\"full_new_model\") \n", "# tokenizer.save_pretrained(\"full_new_model\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# model.push_to_hub(\"Chord-Llama/Llama-3-chord-llama-realFullModel\", token = '') # Online saving\n", "# tokenizer.push_to_hub(\"Chord-Llama/Llama-3-chord-llama-realFullModel\", token = '') # Online saving\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# model.push_to_hub_merged(\"Chord-Llama/Llama-3-chord-llama-fullModel\", tokenizer, save_method = \"merged_16bit\", token =)\n", "# tokenizer.push_to_hub_merged(\"Chord-Llama/Llama-3-chord-llama-fullModel\", tokenizer, save_method = \"merged_16bit\", token =)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9479a173e4cd401d866f66c3b967947e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/576 [00:00.\n", "You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers\n", "Unsloth 2024.4 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.\n" ] } ], "source": [ "import time\n", "from transformers import TextStreamer\n", "from unsloth import FastLanguageModel\n", "\n", "model_or_checkpoint = 'tiny_llama_newData'\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = model_or_checkpoint, \n", " max_seq_length = 2048,\n", " dtype = None,\n", " load_in_4bit = True,\n", ")\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "ins = \"\"\"divisions: '480'\n", "key:\n", " fifths: '-3'\n", " mode: major\n", "time:\n", " beats: '4'\n", " beat-type: '4'\n", "clef:\n", " sign: G\n", " line: '2'\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "inp = \"\"\"- a00_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "- a00_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: F\n", " kind:\n", " '@text': m\n", " '#text': minor\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: B\n", " root-alter: '-1'\n", " kind:\n", " '@text': '7'\n", " '#text': dominant\n", " a01_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " \n", "\n", "### Instruction:\n", "divisions: '480'\n", "key:\n", " fifths: '-3'\n", " mode: major\n", "time:\n", " beats: '4'\n", " beat-type: '4'\n", "clef:\n", " sign: G\n", " line: '2'\n", "\n", "\n", "### Input:\n", "- a00_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "- a00_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: F\n", " kind:\n", " '@text': m\n", " '#text': minor\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: B\n", " root-alter: '-1'\n", " kind:\n", " '@text': '7'\n", " '#text': dominant\n", " a01_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "\n", "\n", "### Response:\n", "- a00_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", "Generated in 19.536226272583008 seconds.\n" ] } ], "source": [ "# alpaca_prompt = Copied from above\n", "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", "inputs = tokenizer(\n", "[\n", " alpaca_prompt.format( ins, inp,\n", " \"\", # output - leave this blank for generation!\n", " )\n", "], return_tensors = \"pt\").to(\"cuda\")\n", "\n", "start_time = time.time()\n", "text_streamer = TextStreamer(tokenizer)\n", "_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 500)\n", "print(f\"Generated in {time.time() - start_time} seconds.\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# Pipeline Toy Example" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import time\n", "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " BitsAndBytesConfig,\n", " HfArgumentParser,\n", " TrainingArguments,\n", " pipeline,\n", " logging,\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# reference: https://colab.research.google.com/drive/134o_cXcMe_lsvl15ZE_4Y75Kstepsntu?usp=sharing\n", "def load_model(model_name):\n", " # Load tokenizer and model with QLoRA configuration\n", " compute_dtype = getattr(torch, \"float16\")\n", "\n", " bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"float16\",\n", " bnb_4bit_compute_dtype=compute_dtype,\n", " bnb_4bit_use_double_quant=False,\n", " )\n", "\n", " if compute_dtype == torch.float16:\n", " major, _ = torch.cuda.get_device_capability()\n", " if major >= 8:\n", " print(\"=\" * 80)\n", " print(\"Your GPU supports bfloat16, you can accelerate training with the argument --bf16\")\n", " print(\"=\" * 80)\n", "\n", " model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " device_map=device_map,\n", " quantization_config=bnb_config\n", " )\n", "\n", " model.config.use_cache = False\n", " model.config.pretraining_tp = 1\n", "\n", " # Load LoRA configuration\n", " peft_config = LoraConfig(\n", " lora_alpha=32,\n", " lora_dropout=0,\n", " r=32,\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\",\n", " )\n", "\n", " # Load Tokenizer\n", " tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", " tokenizer.pad_token = tokenizer.eos_token\n", " tokenizer.padding_side = \"right\"\n", "\n", " return model, tokenizer, peft_config" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# reference: https://colab.research.google.com/drive/134o_cXcMe_lsvl15ZE_4Y75Kstepsntu?usp=sharing#scrollTo=XK4lTwqFflzE\n", " \n", "def text_gen_eval_wrapper(model, tokenizer, prompt, model_id=1, show_metrics=True, temp=0.7, max_new_tokens=200):\n", "\n", " # Initialize the pipeline\n", " pipe = pipeline(task=\"text-generation\",\n", " model=model,\n", " tokenizer=tokenizer,\n", " max_new_tokens=max_new_tokens,\n", " do_sample=True,\n", " temperature=temp)\n", "\n", " # Generate text using the pipeline\n", " pipe = pipeline(task=\"text-generation\",\n", " model=model,\n", " tokenizer=tokenizer,\n", " max_new_tokens=max_new_tokens)\n", " result = pipe(prompt)\n", " generated_text = result[0]['generated_text']\n", "\n", "\n", " index = generated_text.find(\"### Response:\")\n", " if index != -1:\n", " \n", " substring_after_assistant = generated_text[index + len(\"### Response:\"):].strip()\n", " else:\n", "\n", " substring_after_assistant = generated_text.strip()\n", "\n", " return substring_after_assistant" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unused kwargs: ['quant_method']. These kwargs are not used in .\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "================================================================================\n", "Your GPU supports bfloat16, you can accelerate training with the argument --bf16\n", "================================================================================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/martin/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/transformers/quantizers/auto.py:159: UserWarning: You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used.\n", " warnings.warn(warning_msg)\n" ] } ], "source": [ "from peft import LoraConfig\n", "device_map = {\"\": 0}\n", "model, tokenizer, peft_config = load_model('tiny_llama_newData')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "alpaca_prompt = \"\"\"\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "ins = \"\"\"divisions: '480'\n", "key:\n", " fifths: '-3'\n", " mode: major\n", "time:\n", " beats: '4'\n", " beat-type: '4'\n", "clef:\n", " sign: G\n", " line: '2'\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "inp = \"\"\"- a00_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a02_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "- a00_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: F\n", " kind:\n", " '@text': m\n", " '#text': minor\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_harmony:\n", " root:\n", " root-step: B\n", " root-alter: '-1'\n", " kind:\n", " '@text': '7'\n", " '#text': dominant\n", " a01_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': start\n", " type: whole\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"\\n\\n### Instruction:\\ndivisions: '480'\\nkey:\\n fifths: '-3'\\n mode: major\\ntime:\\n beats: '4'\\n beat-type: '4'\\nclef:\\n sign: G\\n line: '2'\\n\\n\\n### Input:\\n- a00_note:\\n rest: null\\n duration: '960'\\n type: half\\n a01_harmony:\\n root:\\n root-step: E\\n root-alter: '-1'\\n kind:\\n '@text': ''\\n '#text': major\\n a02_note:\\n pitch:\\n step: E\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_note:\\n pitch:\\n step: A\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a01_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a02_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_note:\\n pitch:\\n step: B\\n alter: '-1'\\n octave: '3'\\n duration: '960'\\n type: half\\n a01_harmony:\\n root:\\n root-step: A\\n root-alter: '-1'\\n kind:\\n '@text': ''\\n '#text': major\\n a02_note:\\n rest: null\\n duration: '960'\\n type: half\\n- a00_harmony:\\n root:\\n root-step: E\\n root-alter: '-1'\\n kind:\\n '@text': ''\\n '#text': major\\n a01_note:\\n rest: null\\n duration: '960'\\n type: half\\n a02_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_note:\\n pitch:\\n step: A\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a01_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a02_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_harmony:\\n root:\\n root-step: A\\n root-alter: '-1'\\n kind:\\n '@text': ''\\n '#text': major\\n a01_note:\\n pitch:\\n step: E\\n alter: '-1'\\n octave: '4'\\n duration: '1920'\\n tie:\\n '@type': start\\n type: whole\\n- a00_note:\\n pitch:\\n step: E\\n alter: '-1'\\n octave: '4'\\n duration: '1920'\\n tie:\\n '@type': stop\\n type: whole\\n- a00_harmony:\\n root:\\n root-step: F\\n kind:\\n '@text': m\\n '#text': minor\\n a01_note:\\n rest: null\\n duration: '960'\\n type: half\\n a02_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: G\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_note:\\n pitch:\\n step: A\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a01_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a02_note:\\n pitch:\\n step: F\\n octave: '4'\\n duration: '480'\\n type: quarter\\n a03_note:\\n pitch:\\n step: E\\n alter: '-1'\\n octave: '4'\\n duration: '480'\\n type: quarter\\n- a00_harmony:\\n root:\\n root-step: B\\n root-alter: '-1'\\n kind:\\n '@text': '7'\\n '#text': dominant\\n a01_note:\\n pitch:\\n step: D\\n octave: '4'\\n duration: '1920'\\n tie:\\n '@type': start\\n type: whole\\n\\n\\n### Response:\\n\"" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "message = alpaca_prompt.format( ins, inp,\n", " \"\", # output - leave this blank for generation!\n", " )\n", "message" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generated in 39.8182430267334 seconds.\n", "- a00_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n" ] } ], "source": [ "start_time = time.time()\n", "generated_text = text_gen_eval_wrapper(model, tokenizer, message, show_metrics=False, temp=.1, max_new_tokens=500)\n", "print(f\"Generated in {time.time() - start_time} seconds.\")\n", "print(generated_text)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generated in 39.7313814163208 seconds.\n", "- a00_note:\n", " pitch:\n", " step: D\n", " octave: '4'\n", " duration: '1920'\n", " tie:\n", " '@type': stop\n", " type: whole\n", "- a00_harmony:\n", " root:\n", " root-step: E\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", " a02_note:\n", " pitch:\n", " step: E\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: A\n", " alter: '-1'\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a01_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a02_note:\n", " pitch:\n", " step: F\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", " a03_note:\n", " pitch:\n", " step: G\n", " octave: '4'\n", " duration: '480'\n", " type: quarter\n", "- a00_note:\n", " pitch:\n", " step: B\n", " alter: '-1'\n", " octave: '3'\n", " duration: '960'\n", " type: half\n", " a01_note:\n", " rest: null\n", " duration: '960'\n", " type: half\n", "- a00_harmony:\n", " root:\n", " root-step: A\n", " root-alter: '-1'\n", " kind:\n", " '@text': ''\n", " '#text': major\n", " a01_note:\n", " rest: null\n" ] } ], "source": [ "start_time = time.time()\n", "generated_text = text_gen_eval_wrapper(model, tokenizer, message, show_metrics=False, max_new_tokens=500)\n", "print(f\"Generated in {time.time() - start_time} seconds.\")\n", "print(generated_text)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "014479d51b95488da6dbdbfa3e7adce3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1b01f784c04e4d11a2afd72284646138", "placeholder": "​", "style": "IPY_MODEL_5f7db89c6f0b4c968dd8f43e23f54365", "value": "Map (num_proc=2): 100%" } }, "08a476cadd42469eb2234ac503f8447e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "09a4c26458f24fc49aadae2fee60acc4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0c2631134e6248459fdfe8f48205df25": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0df7c15449ed45edb9094e5d58d0731d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0ec293e756104409b563d3e561ec2937": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0f36928c39f641618e89a27445c5cbc2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7d03bcf25c774fed864b09770cfe74ea", "placeholder": "​", "style": "IPY_MODEL_09a4c26458f24fc49aadae2fee60acc4", "value": "special_tokens_map.json: 100%" } }, "10c75c3affac4c839a065c5d07ff7fd3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_7d9adc9aaa6540c3b80fb2b4e02f7b66", "IPY_MODEL_2e24b601930a44b58d5d1f53b33315f7", "IPY_MODEL_bb0f6ed8b0314e119254b574de524f6f" ], "layout": "IPY_MODEL_355b4205fa0e4612954dc918c9cc6449" } }, "126fe7d2d8cd454eb7f71bec0b9b6e01": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f753790028d14420b2683c2bc16dba2b", "max": 121, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b11371e3382440219af0d82b4fae0fe8", "value": 121 } }, "15248beee1f042f29ba17deb3e29c90b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_396e74209ce9460a885b2236d1fb7b97", "placeholder": "​", "style": "IPY_MODEL_a8ec27381f564d7cb6962ee503d2eae6", "value": "tokenizer.json: 100%" } }, "15f5446cc9d0433281001eb149c51cdd": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "161dc482ea1f4f129f355ee4d20e8cd5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_15f5446cc9d0433281001eb149c51cdd", "placeholder": "​", "style": "IPY_MODEL_f27dfff3b0534f558d91747a91629523", "value": "Downloading readme: 100%" } }, "1a4b589eed4f4d9694cae1f69330836a": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1b01f784c04e4d11a2afd72284646138": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1bf51ccd8640436482357b6e06a2f850": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8ce3a2df43bf48429e29bd73dac5e638", "placeholder": "​", "style": "IPY_MODEL_db00b79287da4c93b425d4b07b0f7b72", "value": " 11.6k/11.6k [00:00<00:00, 475kB/s]" } }, "1ea78447b4fc4a38aaff65f7954ae65e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1eef300d637145d68a660db46ca9b14a": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "208f49c8a05f4f25906fb7df2fd95d7c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dd7c9e9d3fca4f0aa6893a48c92e6748", "max": 11610, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_606650950feb431e825cb7ea79c5a1d7", "value": 11610 } }, "2c95bc4f3f9d42be8f03bef23242af66": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2d0462c3fd21481fa6adf8fc9df5c28f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_15248beee1f042f29ba17deb3e29c90b", "IPY_MODEL_c75a509df90340e5857692400bbb8af8", "IPY_MODEL_e7611888e9fb47588aee414cdf0ecf27" ], "layout": "IPY_MODEL_2c95bc4f3f9d42be8f03bef23242af66" } }, "2e24b601930a44b58d5d1f53b33315f7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4bba5aefb61e4b458381bff09abbe96a", "max": 44307561, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_6f37c03d14314e59bef4ff1bc985e866", "value": 44307561 } }, "3018574e40ef4f5284a1832d2efb1121": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "30f2bb04d6754a54af59439cef40ff87": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "320ad99d9e91463986ca91073e3729fa": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_08a476cadd42469eb2234ac503f8447e", "max": 51760, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_8019cfd942a0477e92cc3a34ecc826e6", "value": 51760 } }, "348691f1aa714d16bfc3148b39518b36": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "34f78b3e7ea0400b90d48fb2809a654d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "355b4205fa0e4612954dc918c9cc6449": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "396e74209ce9460a885b2236d1fb7b97": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3b9a0019602a42cd8bbd80a77be5cfcd": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3cfc4b6de45f4ff5b8f23c3b79e6503f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d091d3f8b01a473ea81f5f62567a6407", "placeholder": "​", "style": "IPY_MODEL_973e0a9b040f494895f60ba8e1468c6e", "value": " 121/121 [00:00<00:00, 4.86kB/s]" } }, "3ed36820b9244e878c6610c589d7d821": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5557222f4157411d9250ad82827a2f8f", "placeholder": "​", "style": "IPY_MODEL_b8d26c1ec5c44e1f9c8d20bb6d4c9d1e", "value": " 50.9k/50.9k [00:00<00:00, 1.20MB/s]" } }, "41eaa431bdd94075b0c41899c14c3fc3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "451a5048b6f84360aacbda342448e0da": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_488bb3579b99411ea03d25c0e916e629", "max": 1140, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ab53150e33004a41ab28c06833ed5aa0", "value": 1140 } }, "45560e601cdb484c8e9293dece7555db": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9dd2481401cc43acb405a64e9ce9f13f", "IPY_MODEL_772124f5d8f54e959dff5d56d0335e4c", "IPY_MODEL_9e3f29ee68524e749a3dbf1ba32653b6" ], "layout": "IPY_MODEL_fd8ffc5653b94428bb3b82d3979ef54c" } }, "467bd804fb244a298b8caaf0c3b3a3d4": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "469b8be78b924d3aa90335798775e1c6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "488bb3579b99411ea03d25c0e916e629": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4bba5aefb61e4b458381bff09abbe96a": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4bf5cf0af2e441bf8d101795f8a82594": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "50bd0e7f67f94c9eab8f42493332041c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "526e11b93f064ace9b6a255da796fdd7": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5326096097064f4aa0650ca59ead43aa": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5480935a6ba341bbb62cbfb7d8707fec": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5557222f4157411d9250ad82827a2f8f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5b40ea812830452c8bdc46dbc0dda54f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "5b7210bdfdd547abb29abbbb26dce37e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_adc97fcb04834c7b9417597d9de135cd", "IPY_MODEL_f5b2f797ddae451e8fc1dca9bece278c", "IPY_MODEL_6bf7fcaf8dc347daaf2f5b04fac24300" ], "layout": "IPY_MODEL_3018574e40ef4f5284a1832d2efb1121" } }, "5c64d6e41ddc46da81e4f92f4c88e6d1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5e1d1523a0c34285a6ef4aa13e9cc645": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5f7db89c6f0b4c968dd8f43e23f54365": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5fc82503d5984fc0bfe364b2ae5ad530": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "6038bb8ebaf945da8e8c470fa7d436ec": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "606650950feb431e825cb7ea79c5a1d7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "60d93f53511c4fc1adf11bacf22590cc": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "66f169549d2847e097cb9788e3e4d29c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a3709b260001400e85db6464542c66d3", "IPY_MODEL_7d7726f17a4a462a871f5a97b3d66928", "IPY_MODEL_3ed36820b9244e878c6610c589d7d821" ], "layout": "IPY_MODEL_eb07173547f14d428d8549e88b4f9564" } }, "6bf7fcaf8dc347daaf2f5b04fac24300": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8a372927fe8d4a09b4e35bdfc77a6136", "placeholder": "​", "style": "IPY_MODEL_4bf5cf0af2e441bf8d101795f8a82594", "value": " 51760/0 [00:00<00:00, 126317.04 examples/s]" } }, "6f37c03d14314e59bef4ff1bc985e866": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "72072eb832db4889beef6f3fa5d45044": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_467bd804fb244a298b8caaf0c3b3a3d4", "placeholder": "​", "style": "IPY_MODEL_5480935a6ba341bbb62cbfb7d8707fec", "value": " 5.70G/5.70G [02:23<00:00, 25.8MB/s]" } }, "772124f5d8f54e959dff5d56d0335e4c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3b9a0019602a42cd8bbd80a77be5cfcd", "max": 51760, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_5fc82503d5984fc0bfe364b2ae5ad530", "value": 51760 } }, "7734e446cfbd4e9b9f796ba662f2ab71": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "7d03bcf25c774fed864b09770cfe74ea": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7d7726f17a4a462a871f5a97b3d66928": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5c64d6e41ddc46da81e4f92f4c88e6d1", "max": 50941, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b46d2111e0ee41b9b112f7c9fc65028f", "value": 50941 } }, "7d9adc9aaa6540c3b80fb2b4e02f7b66": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f9e8a5f5102040f380d3287a93cfa045", "placeholder": "​", "style": "IPY_MODEL_ab3e06fd917e4cc59f5b8e275caba930", "value": "Downloading data: 100%" } }, "8019cfd942a0477e92cc3a34ecc826e6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "84569f44a689473889246c2eeab5d8a1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5e1d1523a0c34285a6ef4aa13e9cc645", "placeholder": "​", "style": "IPY_MODEL_e4ff98a12bc1424dadd9a8dec3afb2b3", "value": "model.safetensors: 100%" } }, "8516663d35be4bf48ce4c54f58188075": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "85ee709c047c49c7b7888caf9eff3a54": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_84569f44a689473889246c2eeab5d8a1", "IPY_MODEL_999ed6e6f8b24c5bbfd582c478f45c57", "IPY_MODEL_72072eb832db4889beef6f3fa5d45044" ], "layout": "IPY_MODEL_b87419aaa7cf4ed48b96ebce680bb29b" } }, "8a372927fe8d4a09b4e35bdfc77a6136": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8ce3a2df43bf48429e29bd73dac5e638": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "92eb4d8be9a64892ae5e92c809f568d0": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "96f9b00a55f34d4c8a4ed3c496e1ffbf": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0f36928c39f641618e89a27445c5cbc2", "IPY_MODEL_d4e450ceb7da45e9b8ae82bec8295343", "IPY_MODEL_b997bc55dca04d0f8880447ec9f9ba31" ], "layout": "IPY_MODEL_b1a3d509b7e643fca992dd838bacc203" } }, "973e0a9b040f494895f60ba8e1468c6e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "980076d4a6c44020aed6d19ebfaaaa54": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1a4b589eed4f4d9694cae1f69330836a", "placeholder": "​", "style": "IPY_MODEL_a419b86d8ff0428b88843b55cf0be657", "value": " 1.14k/1.14k [00:00<00:00, 74.0kB/s]" } }, "999ed6e6f8b24c5bbfd582c478f45c57": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_92eb4d8be9a64892ae5e92c809f568d0", "max": 5702746405, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_5b40ea812830452c8bdc46dbc0dda54f", "value": 5702746405 } }, "9a65ad628ffc4e2d87479514be357617": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a98617118be94b73bbd2043c5dd65621", "IPY_MODEL_126fe7d2d8cd454eb7f71bec0b9b6e01", "IPY_MODEL_3cfc4b6de45f4ff5b8f23c3b79e6503f" ], "layout": "IPY_MODEL_f50c67dc31dc48aa978909d8967c5294" } }, "9b3a7554fbf14276b208fce9d24b5b89": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9dd2481401cc43acb405a64e9ce9f13f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e288762221be4e0796631bb7b41bea56", "placeholder": "​", "style": "IPY_MODEL_f94493a870af47c98ded7a71e1614681", "value": "Map: 100%" } }, "9e3f29ee68524e749a3dbf1ba32653b6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9b3a7554fbf14276b208fce9d24b5b89", "placeholder": "​", "style": "IPY_MODEL_c8491efcfb72456f8d629acc49dfac90", "value": " 51760/51760 [00:00<00:00, 80588.64 examples/s]" } }, "a3709b260001400e85db6464542c66d3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_50bd0e7f67f94c9eab8f42493332041c", "placeholder": "​", "style": "IPY_MODEL_b41fbbe7b59b4e13b17244edba47af30", "value": "tokenizer_config.json: 100%" } }, "a419b86d8ff0428b88843b55cf0be657": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a8ec27381f564d7cb6962ee503d2eae6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a96216197bda4d73afdfd657567bd059": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a98617118be94b73bbd2043c5dd65621": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5326096097064f4aa0650ca59ead43aa", "placeholder": "​", "style": "IPY_MODEL_34f78b3e7ea0400b90d48fb2809a654d", "value": "generation_config.json: 100%" } }, "ab3e06fd917e4cc59f5b8e275caba930": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ab53150e33004a41ab28c06833ed5aa0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "acb68ec0ac33408b8d02f22cfbbe4f01": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8516663d35be4bf48ce4c54f58188075", "placeholder": "​", "style": "IPY_MODEL_60d93f53511c4fc1adf11bacf22590cc", "value": " 51760/51760 [00:53<00:00, 1849.32 examples/s]" } }, "adc97fcb04834c7b9417597d9de135cd": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6038bb8ebaf945da8e8c470fa7d436ec", "placeholder": "​", "style": "IPY_MODEL_c9b628fa9ce34162b6d04d4f191f819d", "value": "Generating train split: " } }, "b11371e3382440219af0d82b4fae0fe8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b1a3d509b7e643fca992dd838bacc203": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b41fbbe7b59b4e13b17244edba47af30": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b46d2111e0ee41b9b112f7c9fc65028f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b869fd94c21146b4917ff3e60654664b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b87419aaa7cf4ed48b96ebce680bb29b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b8d26c1ec5c44e1f9c8d20bb6d4c9d1e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b997bc55dca04d0f8880447ec9f9ba31": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a96216197bda4d73afdfd657567bd059", "placeholder": "​", "style": "IPY_MODEL_db420165996143ecb8b6624ef6341912", "value": " 301/301 [00:00<00:00, 6.61kB/s]" } }, "bb0f6ed8b0314e119254b574de524f6f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0c2631134e6248459fdfe8f48205df25", "placeholder": "​", "style": "IPY_MODEL_0ec293e756104409b563d3e561ec2937", "value": " 44.3M/44.3M [00:01<00:00, 46.8MB/s]" } }, "bbea39204f514d65a92dbe79e4d8e697": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_161dc482ea1f4f129f355ee4d20e8cd5", "IPY_MODEL_208f49c8a05f4f25906fb7df2fd95d7c", "IPY_MODEL_1bf51ccd8640436482357b6e06a2f850" ], "layout": "IPY_MODEL_1ea78447b4fc4a38aaff65f7954ae65e" } }, "c75a509df90340e5857692400bbb8af8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c7fe113e1b3b46968d97cdc6fb1dc241", "max": 9084463, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_fcd3a5f67e164712bcf7e8e7ff1f770a", "value": 9084463 } }, "c78e69db216844a39fac827349845f81": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_f2c23b7309ec430c9cb984ba8f511302", "IPY_MODEL_451a5048b6f84360aacbda342448e0da", "IPY_MODEL_980076d4a6c44020aed6d19ebfaaaa54" ], "layout": "IPY_MODEL_b869fd94c21146b4917ff3e60654664b" } }, "c7fe113e1b3b46968d97cdc6fb1dc241": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c8491efcfb72456f8d629acc49dfac90": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c9b628fa9ce34162b6d04d4f191f819d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d091d3f8b01a473ea81f5f62567a6407": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d4e450ceb7da45e9b8ae82bec8295343": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_41eaa431bdd94075b0c41899c14c3fc3", "max": 301, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_7734e446cfbd4e9b9f796ba662f2ab71", "value": 301 } }, "d92aeeaaa8b74016bfcbe49cec44f0d7": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "db00b79287da4c93b425d4b07b0f7b72": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "db420165996143ecb8b6624ef6341912": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "dd7c9e9d3fca4f0aa6893a48c92e6748": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e288762221be4e0796631bb7b41bea56": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e4ff98a12bc1424dadd9a8dec3afb2b3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e7611888e9fb47588aee414cdf0ecf27": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1eef300d637145d68a660db46ca9b14a", "placeholder": "​", "style": "IPY_MODEL_469b8be78b924d3aa90335798775e1c6", "value": " 9.08M/9.08M [00:00<00:00, 15.6MB/s]" } }, "eb07173547f14d428d8549e88b4f9564": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f07f34e9b5c649469ac667239b0fbaad": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_014479d51b95488da6dbdbfa3e7adce3", "IPY_MODEL_320ad99d9e91463986ca91073e3729fa", "IPY_MODEL_acb68ec0ac33408b8d02f22cfbbe4f01" ], "layout": "IPY_MODEL_30f2bb04d6754a54af59439cef40ff87" } }, "f27dfff3b0534f558d91747a91629523": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f2c23b7309ec430c9cb984ba8f511302": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_526e11b93f064ace9b6a255da796fdd7", "placeholder": "​", "style": "IPY_MODEL_0df7c15449ed45edb9094e5d58d0731d", "value": "config.json: 100%" } }, "f50c67dc31dc48aa978909d8967c5294": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f5b2f797ddae451e8fc1dca9bece278c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d92aeeaaa8b74016bfcbe49cec44f0d7", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_348691f1aa714d16bfc3148b39518b36", "value": 1 } }, "f753790028d14420b2683c2bc16dba2b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f94493a870af47c98ded7a71e1614681": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f9e8a5f5102040f380d3287a93cfa045": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fcd3a5f67e164712bcf7e8e7ff1f770a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "fd8ffc5653b94428bb3b82d3979ef54c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } } } } }, "nbformat": 4, "nbformat_minor": 4 }