Spaces:
Paused
Paused
File size: 1,342 Bytes
c03ede8 73866b8 bea725e 73866b8 c03ede8 6f245d4 c03ede8 73866b8 5210b7e 73866b8 5210b7e 73866b8 6f8574e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from transformers import AutoTokenizer, AutoModelForCausalLM
# from transformers import AutoModelForCausalLM, GemmaTokenizer
from langchain.prompts import PromptTemplate
import os
# os.environ["HF_TOKEN"] = os.getenv('HF_TOKEN')
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
# Model used in code generation
# model = AutoModelForCausalLM.from_pretrained('google/codegemma-1.1-2b', trust_remote_code=True)
# tokenizer = GemmaTokenizer.from_pretrained('google/codegemma-1.1-2b', trust_remote_code=True)
def generate_answer(question):
prompt_template = PromptTemplate(template="Answer the following question within 1000 words: {question}",
input_variables=["question"], output_variables=["answer"])
# Model loading
format_prompt = prompt_template.format(question=question)
encoded_input = tokenizer(format_prompt, return_tensors='pt')
# Run the model
output = model.generate(**encoded_input, max_length=4000) # Use generate method for text generation
# Decode the model output to text
decoded_output = tokenizer.decode(output[0])
# response_text = decoded_output.split('\n\n', 1)
return decoded_output
|