import streamlit as st from langchain.docstore.document import Document from chromadb.config import Settings from load_model import load_embedding from load_vectors import load_from_file, load_and_split, create_and_add, load_from_web from utils import retrieve_collections, get_chroma_client def llm_module(): pass def load_files(): client = get_chroma_client() option = st.radio( "", options=["Add Documents", "Start new collection"], ) if option == "Add Documents": collections = retrieve_collections() selected_collection = st.selectbox( 'Add to exsisting collection or create a new one', collections ) if st.button('Delete Collection (⚠️ This is destructive and not reversible)'): client.delete_collection(name=selected_collection["name"]) #retrieve_collections.clear() collections = retrieve_collections() if selected_collection: st.write("Selected Vectorstore:", selected_collection) option = st.radio( "", options=["Upload Files from Local", "Upload Files from Web"], ) if option == "Upload Files from Local": st.write('Source Documents:') uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True) chunk_size = st.text_area('chunk Size:', 1000) if st.button('Upload'): docs = load_from_file(uploaded_files) sub_docs = load_and_split(docs, chunk_size=int(chunk_size)) vec1 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata']) st.write("Upload succesful") else: st.write('Urls of Source Documents (Comma separated):') urls = chunk_size = st.text_area('Urls:', '') chunk_size = st.text_area('chunk Size:', 1000) urls = urls.replace(",", "" ).replace('"', "" ).split(',') if st.button('Upload'): docs = load_from_web(urls) sub_docs = load_and_split(docs, chunk_size=int(chunk_size)) vec2 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata']) st.write("Upload succesful") else: collection = st.text_area('Name of your new collection:', '') model_name = st.text_area('Choose the embedding function:', "hkunlp/instructor-large") if st.button('Create'): if len(collection)>3: ef = load_embedding(model_name) metadata= {"loaded_docs":[], "Subject":"Terms Example", "model_name": ef.model_name} client.create_collection(collection, embedding_function=ef, metadata=metadata) # retrieve_collections.clear() st.write("Collection " +collection+" succesfully created.")