from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import GPT4AllEmbeddings

from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain_community.vectorstores import FAISS
# from langchain_community.embeddings import GPT4AllEmbeddings


# # Create embeddingsclear
vector_db_path = "vectorstores/db_faiss"

# embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=False)
embeddings = GPT4AllEmbeddings(model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf", gpt4all_kwargs = {'allow_download': 'False'})

db = FAISS.load_local(vector_db_path, embeddings, allow_dangerous_deserialization=True)


# # Create retriever
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs= {"k": 256}
)

# # Create Ollama language model - Gemma 2
local_llm = 'alen_ox'

llm = ChatOllama(model=local_llm,
                 keep_alive="3h", 
                 max_tokens=512,  
                 temperature=0)

# Create prompt template
template = """Trả lời câu hỏi CHỈ dựa trên ngữ cảnh sau không có thì bảo không có câu trả lời:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Create the RAG chain using LCEL with prompt printing and streaming output
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)

# Function to ask questions
def ask_question(question):
    respon = ''
    print("Answer:\n\n", end=" ", flush=True)
    for chunk in rag_chain.stream(question):
        respon += chunk.content
        print(chunk.content, end="", flush=True)
    print("\n")
    return respon
    # print(rag_chain.invoke(question))

# Example usage
if __name__ == "__main__":
    while True:
        user_question = input("Ask a question (or type 'quit' to exit): ")
        if user_question.lower() == 'quit':
            break
        answer = ask_question(user_question)
        print(answer)
        # print("\nFull answer received.\n")