Spaces:
Sleeping
Sleeping
File size: 2,310 Bytes
5207833 0cb6039 d1cacb1 0cb6039 d1cacb1 5207833 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
# Download the model file
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
# Initialize the Llama model with appropriate settings for GPU
lcpp_llm = Llama(
model_path=model_path,
n_threads=2, # CPU cores to use
n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
)
def generate_email_response(email_prompt):
# Check input received by the function
print("Received prompt:", email_prompt)
# Determine if the input is a shorthand command or an actual email
if 'email to' in email_prompt.lower():
# Assume it's a shorthand command, format appropriately
formatted_prompt = f'''
Email received: "{email_prompt}"
Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
Response:
'''
else:
# Assume it's direct email content
formatted_prompt = f'''
Email received: "{email_prompt}"
Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
Response:
'''
# Generate response using Llama-2 model
try:
response = lcpp_llm(
prompt=formatted_prompt,
max_tokens=256,
temperature=0.5,
top_p=0.95,
repeat_penalty=1.2,
top_k=150,
echo=True
)
generated_response = response["choices"][0]["text"]
# Remove the input part from the output if it is included
if formatted_prompt in generated_response:
generated_response = generated_response.replace(formatted_prompt, '').strip()
print("Generated response:", generated_response)
return generated_response
except Exception as e:
print("Error in response generation:", str(e))
return "Failed to generate response, please check the console for errors." |