dahara1
/

llama3.1-8b-Instruct-amd-npu

Model card Files Files and versions Community

dahara1 commited on Jul 29

Commit

21bbaa7

•

1 Parent(s): bbf16b8

Update README.md

Files changed (1) hide show

README.md +4 -9

README.md CHANGED Viewed

@@ -14,7 +14,6 @@ The following sample assumes that the setup on the above page has been completed
 This model has only been tested on RyzenAI for Windows 11. It does not work in Linux environments such as WSL.
 ### setup
 In cmd windows.
@@ -26,16 +25,12 @@ pip install transformers==4.43.3
 # Updating the Transformers library will cause the LLama 2 sample to stop working.
 # If you want to run LLama 2, revert to pip install transformers==4.34.0.
 pip install tokenizers==0.19.1
-git lfs install
-git clone https://huggingface.co/dahara1/llama3.1-8b-Instruct-amd-npu
-cd llama3.1-8b-Instruct-amd-npu
-git lfs pull
-cd ..
-copy <your_install_path>\RyzenAI-SW\example\transformers\models\llama2\modeling_llama_amd.py .
 # set up Runtime. see https://ryzenai.docs.amd.com/en/latest/runtime_setup.html
-set XLNX_VART_FIRMWARE=<your_install_path>\voe-4.0-win_amd64\1x4.xclbin
 set NUM_OF_DPU_RUNNERS=1
 # save below sample script as utf8 and llama-3.1-test.py
@@ -83,7 +78,7 @@ if __name__ == "__main__":
     torch.set_num_threads(4)
     tokenizer = AutoTokenizer.from_pretrained("llama3.1-8b-Instruct-amd-npu")
-    ckpt = "llama3.1-8b-Instruct-amd-npu/llama3.1_8b_w_bit_4_awq_amd.pt"
     terminators = [
         tokenizer.eos_token_id,
         tokenizer.convert_tokens_to_ids("<|eot_id|>")

 This model has only been tested on RyzenAI for Windows 11. It does not work in Linux environments such as WSL.
 ### setup
 In cmd windows.
 # Updating the Transformers library will cause the LLama 2 sample to stop working.
 # If you want to run LLama 2, revert to pip install transformers==4.34.0.
 pip install tokenizers==0.19.1
+pip install -U "huggingface_hub[cli]""
+huggingface-cli download dahara1/llama3.1-8b-Instruct-amd-npu --revision main --local-dir llama3.1-8b-Instruct-amd-npu
 # set up Runtime. see https://ryzenai.docs.amd.com/en/latest/runtime_setup.html
+set XLNX_VART_FIRMWARE=<your_firmware_install_path>\voe-4.0-win_amd64\1x4.xclbin
 set NUM_OF_DPU_RUNNERS=1
 # save below sample script as utf8 and llama-3.1-test.py
     torch.set_num_threads(4)
     tokenizer = AutoTokenizer.from_pretrained("llama3.1-8b-Instruct-amd-npu")
+    ckpt = r"llama3.1-8b-Instruct-amd-npu\llama3.1_8b_w_bit_4_awq_amd.pt"
     terminators = [
         tokenizer.eos_token_id,
         tokenizer.convert_tokens_to_ids("<|eot_id|>")