Zeta / embed_documents.py
Ritvik19's picture
Upload 5 files
7e4014b verified
raw
history blame
653 Bytes
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI
from ragatouille import RAGPretrainedModel
def create_retriever(texts):
colbert = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv1.9")
colbert.index(
collection=[chunk.page_content for chunk in texts],
split_documents=False,
document_metadatas=[chunk.metadata for chunk in texts],
index_name="vector_store",
)
retriever = colbert.as_langchain_retriever(k=5)
retriever = MultiQueryRetriever.from_llm(
retriever=retriever, llm=ChatOpenAI(temperature=0)
)
return retriever