from transformers import AutoTokenizer, AutoModelForCausalLM # from transformers import AutoModelForCausalLM, GemmaTokenizer from langchain.prompts import PromptTemplate import os # os.environ["HF_TOKEN"] = os.getenv('HF_TOKEN') model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) # Model used in code generation # model = AutoModelForCausalLM.from_pretrained('google/codegemma-1.1-2b', trust_remote_code=True) # tokenizer = GemmaTokenizer.from_pretrained('google/codegemma-1.1-2b', trust_remote_code=True) def generate_answer(question): prompt_template = PromptTemplate(template="Answer the following question within 1000 words: {question}", input_variables=["question"], output_variables=["answer"]) # Model loading format_prompt = prompt_template.format(question=question) encoded_input = tokenizer(format_prompt, return_tensors='pt') # Run the model output = model.generate(**encoded_input, max_length=4000) # Use generate method for text generation # Decode the model output to text decoded_output = tokenizer.decode(output[0]) # response_text = decoded_output.split('\n\n', 1) return decoded_output