Spaces:
Paused
Paused
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# from transformers import AutoModelForCausalLM, GemmaTokenizer | |
from langchain.prompts import PromptTemplate | |
import os | |
# os.environ["HF_TOKEN"] = os.getenv('HF_TOKEN') | |
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) | |
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) | |
# Model used in code generation | |
# model = AutoModelForCausalLM.from_pretrained('google/codegemma-1.1-2b', trust_remote_code=True) | |
# tokenizer = GemmaTokenizer.from_pretrained('google/codegemma-1.1-2b', trust_remote_code=True) | |
def generate_answer(question): | |
prompt_template = PromptTemplate(template="Answer the following question within 1000 words: {question}", | |
input_variables=["question"], output_variables=["answer"]) | |
# Model loading | |
format_prompt = prompt_template.format(question=question) | |
encoded_input = tokenizer(format_prompt, return_tensors='pt') | |
# Run the model | |
output = model.generate(**encoded_input, max_length=4000) # Use generate method for text generation | |
# Decode the model output to text | |
decoded_output = tokenizer.decode(output[0]) | |
# response_text = decoded_output.split('\n\n', 1) | |
return decoded_output | |