from langchain_community.chat_models import ChatOllama from langchain_community.embeddings import GPT4AllEmbeddings from langchain.prompts import ChatPromptTemplate from langchain.schema.runnable import RunnablePassthrough from langchain_community.vectorstores import FAISS # from langchain_community.embeddings import GPT4AllEmbeddings # # Create embeddingsclear vector_db_path = "vectorstores/db_faiss" # embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=False) embeddings = GPT4AllEmbeddings(model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf", gpt4all_kwargs = {'allow_download': 'False'}) db = FAISS.load_local(vector_db_path, embeddings, allow_dangerous_deserialization=True) # # Create retriever retriever = db.as_retriever( search_type="similarity", search_kwargs= {"k": 256} ) # # Create Ollama language model - Gemma 2 local_llm = 'alen_ox' llm = ChatOllama(model=local_llm, keep_alive="3h", max_tokens=512, temperature=0) # Create prompt template template = """Trả lời câu hỏi CHỈ dựa trên ngữ cảnh sau không có thì bảo không có câu trả lời: {context} Question: {question} """ prompt = ChatPromptTemplate.from_template(template) # Create the RAG chain using LCEL with prompt printing and streaming output rag_chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | llm ) # Function to ask questions def ask_question(question): respon = '' print("Answer:\n\n", end=" ", flush=True) for chunk in rag_chain.stream(question): respon += chunk.content print(chunk.content, end="", flush=True) print("\n") return respon # print(rag_chain.invoke(question)) # Example usage if __name__ == "__main__": while True: user_question = input("Ask a question (or type 'quit' to exit): ") if user_question.lower() == 'quit': break answer = ask_question(user_question) print(answer) # print("\nFull answer received.\n")