Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
from tempfile import NamedTemporaryFile
|
3 |
-
from langchain.document_loaders import PyPDFLoader
|
4 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
-
from langchain.vectorstores import Chroma
|
6 |
-
from langchain import PromptTemplate, LLMChain
|
7 |
-
from langchain.llms import HuggingFaceHub
|
8 |
import os
|
|
|
9 |
|
10 |
# Function to save the uploaded PDF to a temporary file
|
11 |
def save_uploaded_file(uploaded_file):
|
@@ -13,17 +9,6 @@ def save_uploaded_file(uploaded_file):
|
|
13 |
temp_file.write(uploaded_file.read())
|
14 |
return temp_file.name
|
15 |
|
16 |
-
# Initialize the model and other resources outside the main function
|
17 |
-
@st.cache(allow_output_mutation=True)
|
18 |
-
def initialize_model():
|
19 |
-
# Initialize the HuggingFaceHub with the appropriate task
|
20 |
-
llm = HuggingFaceHub(
|
21 |
-
repo_id="deepset/roberta-base-squad2",
|
22 |
-
model_kwargs={"temperature": 1e-10},
|
23 |
-
huggingfacehub_api_token="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"
|
24 |
-
)
|
25 |
-
return llm
|
26 |
-
|
27 |
# Streamlit UI
|
28 |
st.title("PDF Question Answering App")
|
29 |
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
|
@@ -31,49 +16,29 @@ if uploaded_file is not None:
|
|
31 |
# Save the uploaded file to a temporary location
|
32 |
temp_file_path = save_uploaded_file(uploaded_file)
|
33 |
|
34 |
-
# Load the
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
# Initialize embeddings and Chroma
|
39 |
-
embed = HuggingFaceEmbeddings()
|
40 |
-
db = Chroma.from_documents(pages, embed)
|
41 |
-
|
42 |
-
# Load the model using the cached function
|
43 |
-
llm = initialize_model()
|
44 |
|
45 |
# Define a function to get answers
|
46 |
-
def get_answer(question):
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
# Prompt template
|
55 |
-
template = """Use the following pieces of context to answer the question at the end.
|
56 |
-
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
57 |
-
Context: {context} and
|
58 |
-
Question: {question}
|
59 |
-
Answer: """
|
60 |
-
|
61 |
-
prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)
|
62 |
-
|
63 |
-
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
64 |
-
output = llm_chain.run(context=context, question=question, max_length=512)
|
65 |
-
|
66 |
-
answer_index = output.find("Answer:")
|
67 |
-
next_line_index = output.find("\n", answer_index)
|
68 |
-
answer_content = output[answer_index + len("Answer:")-1:next_line_index]
|
69 |
-
|
70 |
-
return answer_content
|
71 |
|
72 |
question = st.text_input("Enter your question:")
|
|
|
73 |
if st.button("Get Answer"):
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
77 |
|
78 |
# Cleanup: Delete the temporary file
|
79 |
os.remove(temp_file_path)
|
|
|
1 |
import streamlit as st
|
2 |
from tempfile import NamedTemporaryFile
|
|
|
|
|
|
|
|
|
|
|
3 |
import os
|
4 |
+
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
|
5 |
|
6 |
# Function to save the uploaded PDF to a temporary file
|
7 |
def save_uploaded_file(uploaded_file):
|
|
|
9 |
temp_file.write(uploaded_file.read())
|
10 |
return temp_file.name
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# Streamlit UI
|
13 |
st.title("PDF Question Answering App")
|
14 |
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
|
|
|
16 |
# Save the uploaded file to a temporary location
|
17 |
temp_file_path = save_uploaded_file(uploaded_file)
|
18 |
|
19 |
+
# Load the pre-trained question-answering model and tokenizer
|
20 |
+
model_name = "deepset/roberta-base-squad2"
|
21 |
+
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Define a function to get answers
|
25 |
+
def get_answer(question, context):
|
26 |
+
inputs = tokenizer(question, context, return_tensors="pt")
|
27 |
+
start_scores, end_scores = model(**inputs)
|
28 |
+
answer_start = start_scores.argmax()
|
29 |
+
answer_end = end_scores.argmax() + 1
|
30 |
+
answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
|
31 |
+
return answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
question = st.text_input("Enter your question:")
|
34 |
+
context = st.text_area("Enter the context:")
|
35 |
if st.button("Get Answer"):
|
36 |
+
if not context:
|
37 |
+
st.warning("Please provide context for the question.")
|
38 |
+
else:
|
39 |
+
answer = get_answer(question, context)
|
40 |
+
st.write("Answer:")
|
41 |
+
st.write(answer)
|
42 |
|
43 |
# Cleanup: Delete the temporary file
|
44 |
os.remove(temp_file_path)
|