Ritvik19 commited on
Commit
60e8923
1 Parent(s): 80dcc5f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +92 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ from langchain.chains import ConversationalRetrievalChain
5
+ from langchain.embeddings import OpenAIEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.llms.openai import OpenAIChat
8
+ from langchain.document_loaders import PyPDFLoader, WebBaseLoader
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.embeddings.openai import OpenAIEmbeddings
11
+
12
+ import streamlit as st
13
+
14
+
15
+ LOCAL_VECTOR_STORE_DIR = Path(__file__).resolve().parent.joinpath("vector_store")
16
+
17
+
18
+ def load_documents():
19
+ loaders = [
20
+ PyPDFLoader(source_doc_url)
21
+ if source_doc_url.endswith(".pdf")
22
+ else WebBaseLoader(source_doc_url)
23
+ for source_doc_url in st.session_state.source_doc_urls
24
+ ]
25
+ documents = []
26
+ for loader in loaders:
27
+ documents.extend(loader.load())
28
+ return documents
29
+
30
+
31
+ def split_documents(documents):
32
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
33
+ texts = text_splitter.split_documents(documents)
34
+ return texts
35
+
36
+
37
+ def embeddings_on_local_vectordb(texts):
38
+ vectordb = Chroma.from_documents(
39
+ texts,
40
+ embedding=OpenAIEmbeddings(),
41
+ persist_directory=LOCAL_VECTOR_STORE_DIR.as_posix(),
42
+ )
43
+ vectordb.persist()
44
+ retriever = vectordb.as_retriever(search_kwargs={"k": 3})
45
+ return retriever
46
+
47
+
48
+ def query_llm(retriever, query):
49
+ qa_chain = ConversationalRetrievalChain.from_llm(
50
+ llm=OpenAIChat(),
51
+ retriever=retriever,
52
+ return_source_documents=True,
53
+ )
54
+ result = qa_chain({"question": query, "chat_history": st.session_state.messages})
55
+ result = result["answer"]
56
+ st.session_state.messages.append((query, result))
57
+ return result
58
+
59
+
60
+ def input_fields():
61
+ os.environ["OPENAI_API_KEY"] = "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"
62
+ st.session_state.source_doc_urls = [
63
+ url.strip() for url in st.sidebar.text_input("Source Document URLs").split(",")
64
+ ]
65
+
66
+
67
+ def process_documents():
68
+ try:
69
+ documents = load_documents()
70
+ texts = split_documents(documents)
71
+ st.session_state.retriever = embeddings_on_local_vectordb(texts)
72
+ except Exception as e:
73
+ st.error(f"An error occurred: {e}")
74
+
75
+
76
+ def boot():
77
+ st.title("Enigma Chatbot")
78
+ input_fields()
79
+ st.sidebar.button("Submit Documents", on_click=process_documents)
80
+ if "messages" not in st.session_state:
81
+ st.session_state.messages = []
82
+ for message in st.session_state.messages:
83
+ st.chat_message("human").write(message[0])
84
+ st.chat_message("ai").write(message[1])
85
+ if query := st.chat_input():
86
+ st.chat_message("human").write(query)
87
+ response = query_llm(st.session_state.retriever, query)
88
+ st.chat_message("ai").write(response)
89
+
90
+
91
+ if __name__ == "__main__":
92
+ boot()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai==0.28
2
+ langchain==0.1.1
3
+ pypdf==4.0.0
4
+ chromadb==0.4.22