mikepastor11 commited on
Commit
bc2edb8
1 Parent(s): b4ffaef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -67
app.py CHANGED
@@ -26,73 +26,73 @@ def get_pdf_text(pdf_docs):
26
 
27
  # Chunk size and overlap must not exceed the models capacity!
28
  #
29
- def get_text_chunks(text):
30
- text_splitter = CharacterTextSplitter(
31
- separator="\n",
32
- chunk_size=800, # 1000
33
- chunk_overlap=200,
34
- length_function=len
35
- )
36
- chunks = text_splitter.split_text(text)
37
- return chunks
38
-
39
-
40
- def get_vectorstore(text_chunks):
41
- # embeddings = OpenAIEmbeddings()
42
-
43
- # pip install InstructorEmbedding
44
- # pip install sentence-transformers==2.2.2
45
- embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
46
-
47
- # from InstructorEmbedding import INSTRUCTOR
48
- # model = INSTRUCTOR('hkunlp/instructor-xl')
49
- # sentence = "3D ActionSLAM: wearable person tracking in multi-floor environments"
50
- # instruction = "Represent the Science title:"
51
- # embeddings = model.encode([[instruction, sentence]])
52
-
53
- # embeddings = model.encode(text_chunks)
54
- print('have Embeddings: ')
55
-
56
- # text_chunks="this is a test"
57
- # FAISS, Chroma and other vector databases
58
- #
59
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
60
- print('FAISS succeeds: ')
61
-
62
- return vectorstore
63
-
64
- def get_conversation_chain(vectorstore):
65
- # llm = ChatOpenAI()
66
- # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
67
- # google/bigbird-roberta-base facebook/bart-large
68
- llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 512})
69
-
70
- memory = ConversationBufferMemory(
71
- memory_key='chat_history', return_messages=True)
72
- conversation_chain = ConversationalRetrievalChain.from_llm(
73
- llm=llm,
74
- retriever=vectorstore.as_retriever(),
75
- memory=memory,
76
- )
77
- return conversation_chain
78
-
79
- def handle_userinput(user_question):
80
-
81
- response = st.session_state.conversation({'question': user_question})
82
- # response = st.session_state.conversation({'summarization': user_question})
83
- st.session_state.chat_history = response['chat_history']
84
-
85
-
86
- # st.empty()
87
-
88
- for i, message in enumerate(st.session_state.chat_history):
89
- if i % 2 == 0:
90
- st.write(user_template.replace(
91
- "{{MSG}}", message.content), unsafe_allow_html=True)
92
-
93
- else:
94
- st.write(bot_template.replace(
95
- "{{MSG}}", message.content), unsafe_allow_html=True)
96
 
97
 
98
 
 
26
 
27
  # Chunk size and overlap must not exceed the models capacity!
28
  #
29
+ # def get_text_chunks(text):
30
+ # text_splitter = CharacterTextSplitter(
31
+ # separator="\n",
32
+ # chunk_size=800, # 1000
33
+ # chunk_overlap=200,
34
+ # length_function=len
35
+ # )
36
+ # chunks = text_splitter.split_text(text)
37
+ # return chunks
38
+
39
+
40
+ # def get_vectorstore(text_chunks):
41
+ # # embeddings = OpenAIEmbeddings()
42
+
43
+ # # pip install InstructorEmbedding
44
+ # # pip install sentence-transformers==2.2.2
45
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
46
+
47
+ # # from InstructorEmbedding import INSTRUCTOR
48
+ # # model = INSTRUCTOR('hkunlp/instructor-xl')
49
+ # # sentence = "3D ActionSLAM: wearable person tracking in multi-floor environments"
50
+ # # instruction = "Represent the Science title:"
51
+ # # embeddings = model.encode([[instruction, sentence]])
52
+
53
+ # # embeddings = model.encode(text_chunks)
54
+ # print('have Embeddings: ')
55
+
56
+ # # text_chunks="this is a test"
57
+ # # FAISS, Chroma and other vector databases
58
+ # #
59
+ # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
60
+ # print('FAISS succeeds: ')
61
+
62
+ # return vectorstore
63
+
64
+ # def get_conversation_chain(vectorstore):
65
+ # # llm = ChatOpenAI()
66
+ # # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
67
+ # # google/bigbird-roberta-base facebook/bart-large
68
+ # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 512})
69
+
70
+ # memory = ConversationBufferMemory(
71
+ # memory_key='chat_history', return_messages=True)
72
+ # conversation_chain = ConversationalRetrievalChain.from_llm(
73
+ # llm=llm,
74
+ # retriever=vectorstore.as_retriever(),
75
+ # memory=memory,
76
+ # )
77
+ # return conversation_chain
78
+
79
+ # def handle_userinput(user_question):
80
+
81
+ # response = st.session_state.conversation({'question': user_question})
82
+ # # response = st.session_state.conversation({'summarization': user_question})
83
+ # st.session_state.chat_history = response['chat_history']
84
+
85
+
86
+ # # st.empty()
87
+
88
+ # for i, message in enumerate(st.session_state.chat_history):
89
+ # if i % 2 == 0:
90
+ # st.write(user_template.replace(
91
+ # "{{MSG}}", message.content), unsafe_allow_html=True)
92
+
93
+ # else:
94
+ # st.write(bot_template.replace(
95
+ # "{{MSG}}", message.content), unsafe_allow_html=True)
96
 
97
 
98