Spaces:
Running
Running
update llama model
Browse files- app.py +0 -1
- llama2_response_mail_generator.py +4 -0
app.py
CHANGED
@@ -78,7 +78,6 @@ print('finished download...')
|
|
78 |
# Initialize the Llama model with appropriate settings for GPU
|
79 |
lcpp_llm = Llama(
|
80 |
model_path=model_path_llama,
|
81 |
-
n_threads=2, # CPU cores to use
|
82 |
n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
|
83 |
)
|
84 |
|
|
|
78 |
# Initialize the Llama model with appropriate settings for GPU
|
79 |
lcpp_llm = Llama(
|
80 |
model_path=model_path_llama,
|
|
|
81 |
n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
|
82 |
)
|
83 |
|
llama2_response_mail_generator.py
CHANGED
@@ -2,7 +2,11 @@ from huggingface_hub import hf_hub_download
|
|
2 |
|
3 |
from llama_cpp import Llama
|
4 |
|
|
|
|
|
5 |
|
|
|
|
|
6 |
|
7 |
# Initialize the Llama model with appropriate settings for GPU
|
8 |
lcpp_llm = Llama(
|
|
|
2 |
|
3 |
from llama_cpp import Llama
|
4 |
|
5 |
+
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
|
6 |
+
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
|
7 |
|
8 |
+
# Download the model file
|
9 |
+
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
|
10 |
|
11 |
# Initialize the Llama model with appropriate settings for GPU
|
12 |
lcpp_llm = Llama(
|