File size: 1,342 Bytes
c03ede8
 
73866b8
bea725e
73866b8
c03ede8
 
 
6f245d4
c03ede8
 
 
73866b8
 
 
5210b7e
73866b8
 
 
 
 
5210b7e
73866b8
 
6f8574e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from transformers import AutoTokenizer, AutoModelForCausalLM
# from transformers import AutoModelForCausalLM, GemmaTokenizer
from langchain.prompts import PromptTemplate
import os

# os.environ["HF_TOKEN"] = os.getenv('HF_TOKEN')
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)

# Model used in code generation
# model = AutoModelForCausalLM.from_pretrained('google/codegemma-1.1-2b', trust_remote_code=True)
# tokenizer = GemmaTokenizer.from_pretrained('google/codegemma-1.1-2b', trust_remote_code=True)


def generate_answer(question):
    prompt_template = PromptTemplate(template="Answer the following question within 1000 words: {question}",
                                     input_variables=["question"], output_variables=["answer"])
    # Model loading
    format_prompt = prompt_template.format(question=question)
    encoded_input = tokenizer(format_prompt, return_tensors='pt')
    # Run the model
    output = model.generate(**encoded_input, max_length=4000)  # Use generate method for text generation
    # Decode the model output to text
    decoded_output = tokenizer.decode(output[0])
    # response_text = decoded_output.split('\n\n', 1)
    return decoded_output