Upload 6 files

by Chat-Error - opened Jun 9, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+93611

-0

Files changed (6) hide show

export_hf_checkpoint.py +55 -0
merge_percentage.py +112 -0
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +33 -0

export_hf_checkpoint.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import torch
+import transformers
+from peft import PeftModel
+from transformers import LlamaForCausalLM, LlamaTokenizer  # noqa: F402
+BASE_MODEL = os.environ.get("BASE_MODEL", None)
+assert (
+    BASE_MODEL
+), "Please specify a value for BASE_MODEL environment variable, e.g. `export BASE_MODEL=huggyllama/llama-7b`"  # noqa: E501
+tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
+base_model = LlamaForCausalLM.from_pretrained(
+    BASE_MODEL,
+    load_in_8bit=False,
+    torch_dtype=torch.float16,
+    device_map={"": "cpu"},
+)
+first_weight = base_model.model.layers[0].self_attn.q_proj.weight
+first_weight_old = first_weight.clone()
+lora_model = PeftModel.from_pretrained(
+    base_model,
+    "serpdotai/llama-oasst-lora-13B",
+    device_map={"": "cpu"},
+    torch_dtype=torch.float16,
+)
+lora_weight = lora_model.base_model.model.model.layers[
+    0
+].self_attn.q_proj.weight
+assert torch.allclose(first_weight_old, first_weight)
+# merge weights - new merging method from peft
+lora_model = lora_model.merge_and_unload()
+lora_model.train(False)
+# did we do anything?
+assert not torch.allclose(first_weight_old, first_weight)
+lora_model_sd = lora_model.state_dict()
+deloreanized_sd = {
+    k.replace("base_model.model.", ""): v
+    for k, v in lora_model_sd.items()
+    if "lora" not in k
+}
+LlamaForCausalLM.save_pretrained(
+    base_model, "./hf_ckpt", state_dict=deloreanized_sd, max_shard_size="400MB"
+)

merge_percentage.py ADDED Viewed

	@@ -0,0 +1,112 @@

+print("Starting script, plese wait...")
+import torch
+import shutil
+import json
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
+from tkinter.filedialog import askdirectory, askopenfilename
+# Rubbish experiment by Concedo for KoboldAI usage
+# Experimenting with the ability to blend weights from 2 LLMs of the same architecture
+# Both models must have the same architecture, number of parameters, layer counts and types, and use the same vocab.
+#mixer output settings
+blend_ratio = 0.2           #setting to 0 gives first model, and 1 gives second model
+fp16 = False #perform operations in fp16. Saves memory, but CPU inference will not be possible.
+always_output_fp16 = True   #if true, will output fp16 even if operating in fp32
+max_shard_size = "2000MiB"  #set output shard size
+verbose_info = True        #will show model information when loading
+force_cpu = True            #only use cpu
+load_sharded = True         #load both models shard by shard
+#test generation settings, only for fp32
+deterministic_test = True   #determines if outputs are always the same
+test_prompt = "Test, "    #test prompt for generation. only for fp32. set to empty string to skip generating.
+test_max_length = 32        #test generation length
+blend_ratio_b = 1.0 - blend_ratio
+def get_model_info(model):
+    with torch.no_grad():
+        outfo = ""
+        cntent = 0
+        outfo += "\n==============================\n"
+        for name, para in model.named_parameters():
+            cntent += 1
+            outfo += ('{}: {}'.format(name, para.shape))+"\n"
+        outfo += ("Num Entries: " + str(cntent))+"\n"
+        outfo += ("==============================\n")
+        return outfo
+def merge_models(model1,model2):
+    with torch.no_grad():
+        tensornum = 0
+        for p1, p2 in zip(model1.parameters(), model2.parameters()):
+           p1 *= blend_ratio
+           p2 *= blend_ratio_b
+           p1 += p2
+           #print(p1)
+           #print(p2)
+           tensornum += 1
+           if verbose_info:
+               print("Merging tensor "+str(tensornum))
+           pass
+def read_index_filenames(sourcedir):
+    index = json.load(open(sourcedir + '/pytorch_model.bin.index.json','rt'))
+    fl = []
+    for k,v in index['weight_map'].items():
+        if v not in fl:
+            fl.append(v)
+    return fl
+print("Opening file dialog, please select FIRST model directory...")
+model_path1 = askdirectory(title="Select Directory of FIRST model to merge")
+print("Opening file dialog, please select SECOND model directory...")
+model_path2 = askdirectory(title="Select Directory of SECOND model to merge")
+print("Opening file dialog, please select OUTPUT model directory...")
+model_path3 = askdirectory(title="Select Output Directory of merged model")
+if not model_path1 or not model_path2:
+    print("\nYou must select two directories containing models to merge and one output directory. Exiting.")
+    exit()
+with torch.no_grad():
+    if fp16:
+        torch.set_default_dtype(torch.float16)
+    else:
+        torch.set_default_dtype(torch.float32)
+    device = torch.device("cuda") if (torch.cuda.is_available() and not force_cpu) else torch.device("cpu")
+    print(device)
+    print("Loading Model 1...")
+    model1 = AutoModelForCausalLM.from_pretrained(model_path1, torch_dtype='auto') #,torch_dtype=torch.float16
+    model1 = model1.to(device)
+    model1.eval()
+    print("Model 1 Loaded. Dtype: " + str(model1.dtype))
+    print("Loading Model 2...")
+    model2 = AutoModelForCausalLM.from_pretrained(model_path2, torch_dtype='auto') #,torch_dtype=torch.float16
+    model2 = model2.to(device)
+    model2.eval()
+    print("Model 2 Loaded. Dtype: " + str(model2.dtype))
+    #ensure both models have the exact same layout
+    m1_info = get_model_info(model1)
+    m2_info = get_model_info(model2)
+    print("Merging models...")
+    merge_models(model1,model2)
+    if model_path3:
+        print("Saving new model...")
+        newsavedpath = model_path3+"/converted_model"
+        if always_output_fp16 and not fp16:
+            model1.half()
+        model1.save_pretrained(newsavedpath, max_shard_size=max_shard_size)
+        print("\nSaved to: " + newsavedpath)
+    else:
+        print("\nOutput model was not saved as no output path was selected.")
+    print("\nScript Completed.")

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "model_max_length": 2048,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}