Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,12 @@
|
|
1 |
import streamlit as st
|
2 |
-
from tempfile import NamedTemporaryFile
|
3 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
|
5 |
|
6 |
# Function to save the uploaded PDF to a temporary file
|
@@ -16,29 +22,52 @@ if uploaded_file is not None:
|
|
16 |
# Save the uploaded file to a temporary location
|
17 |
temp_file_path = save_uploaded_file(uploaded_file)
|
18 |
|
19 |
-
# Load the
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Define a function to get answers
|
25 |
-
def get_answer(question
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
question = st.text_input("Enter your question:")
|
34 |
-
context = st.text_area("Enter the context:")
|
35 |
if st.button("Get Answer"):
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
answer = get_answer(question, context)
|
40 |
-
st.write("Answer:")
|
41 |
-
st.write(answer)
|
42 |
|
43 |
# Cleanup: Delete the temporary file
|
44 |
os.remove(temp_file_path)
|
|
|
1 |
import streamlit as st
|
|
|
2 |
import os
|
3 |
+
from tempfile import NamedTemporaryFile
|
4 |
+
from langchain.document_loaders import PyPDFLoader
|
5 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
+
from langchain.vectorstores import Chroma
|
7 |
+
from langchain.llms import GPT4All
|
8 |
+
from langchain.chains.question_answering import load_qa_chain
|
9 |
+
from langchain import PromptTemplate, LLMChain
|
10 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
|
11 |
|
12 |
# Function to save the uploaded PDF to a temporary file
|
|
|
22 |
# Save the uploaded file to a temporary location
|
23 |
temp_file_path = save_uploaded_file(uploaded_file)
|
24 |
|
25 |
+
# Load the PDF document using PyPDFLoader
|
26 |
+
loader = PyPDFLoader(temp_file_path)
|
27 |
+
pages = loader.load_and_split()
|
28 |
+
|
29 |
+
# Initialize embeddings and Chroma
|
30 |
+
embed = HuggingFaceEmbeddings()
|
31 |
+
db = Chroma.from_documents(pages, embed)
|
32 |
+
|
33 |
+
# Initialize the GPT-4 model
|
34 |
+
llm = GPT4All(
|
35 |
+
model="./gpt4all-converted.bin"
|
36 |
+
)
|
37 |
|
38 |
# Define a function to get answers
|
39 |
+
def get_answer(question):
|
40 |
+
doc = db.similarity_search(question, k=4)
|
41 |
+
context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content
|
42 |
+
|
43 |
+
max_seq_length = 512 # You may define this based on your model
|
44 |
+
|
45 |
+
context = context[:max_seq_length]
|
46 |
+
|
47 |
+
# Load the model & tokenizer for question-answering
|
48 |
+
model_name = "deepset/roberta-base-squad2"
|
49 |
+
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
|
50 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
51 |
+
|
52 |
+
# Create a question-answering pipeline
|
53 |
+
nlp = pipeline("question-answering", model=model, tokenizer=tokenizer)
|
54 |
+
|
55 |
+
# Prepare the input
|
56 |
+
QA_input = {
|
57 |
+
"question": question,
|
58 |
+
"context": context,
|
59 |
+
}
|
60 |
+
|
61 |
+
# Get the answer
|
62 |
+
result = nlp(**QA_input)
|
63 |
+
|
64 |
+
return result["answer"]
|
65 |
|
66 |
question = st.text_input("Enter your question:")
|
|
|
67 |
if st.button("Get Answer"):
|
68 |
+
answer = get_answer(question)
|
69 |
+
st.write("Answer:")
|
70 |
+
st.write(answer)
|
|
|
|
|
|
|
71 |
|
72 |
# Cleanup: Delete the temporary file
|
73 |
os.remove(temp_file_path)
|