Spaces:

Manoj21k
/

Custom-QandA

Sleeping

App Files Files Community

Manoj21k commited on Sep 23, 2023

Commit

b9c3d84

•

1 Parent(s): c20f32a

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -54

app.py CHANGED Viewed

@@ -1,11 +1,7 @@
 import streamlit as st
 from tempfile import NamedTemporaryFile
-from langchain.document_loaders import PyPDFLoader
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import Chroma
-from langchain import PromptTemplate, LLMChain
-from langchain.llms import HuggingFaceHub
 import os
 # Function to save the uploaded PDF to a temporary file
 def save_uploaded_file(uploaded_file):
@@ -13,17 +9,6 @@ def save_uploaded_file(uploaded_file):
         temp_file.write(uploaded_file.read())
         return temp_file.name
-# Initialize the model and other resources outside the main function
-@st.cache(allow_output_mutation=True)
-def initialize_model():
-    # Initialize the HuggingFaceHub with the appropriate task
-    llm = HuggingFaceHub(
-        repo_id="deepset/roberta-base-squad2",
-        model_kwargs={"temperature": 1e-10},
-        huggingfacehub_api_token="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"
-    )
-    return llm
 # Streamlit UI
 st.title("PDF Question Answering App")
 uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
@@ -31,49 +16,29 @@ if uploaded_file is not None:
     # Save the uploaded file to a temporary location
     temp_file_path = save_uploaded_file(uploaded_file)
-    # Load the PDF document using PyPDFLoader
-    loader = PyPDFLoader(temp_file_path)
-    pages = loader.load_and_split()
-    # Initialize embeddings and Chroma
-    embed = HuggingFaceEmbeddings()
-    db = Chroma.from_documents(pages, embed)
-    # Load the model using the cached function
-    llm = initialize_model()
     # Define a function to get answers
-    def get_answer(question):
-        doc = db.similarity_search(question, k=4)
-        context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content
-        max_seq_length = 512  # You may define this based on your model
-        context = context[:max_seq_length]
-        # Prompt template
-        template = """Use the following pieces of context to answer the question at the end.
-        If you don't know the answer, just say that you don't know, don't try to make up an answer.
-        Context: {context} and
-        Question: {question}
-        Answer: """
-        prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)
-        llm_chain = LLMChain(prompt=prompt, llm=llm)
-        output = llm_chain.run(context=context, question=question, max_length=512)
-        answer_index = output.find("Answer:")
-        next_line_index = output.find("\n", answer_index)
-        answer_content = output[answer_index + len("Answer:")-1:next_line_index]
-        return answer_content
     question = st.text_input("Enter your question:")
     if st.button("Get Answer"):
-        answer = get_answer(question)
-        st.write("Answer:")
-        st.write(answer)
     # Cleanup: Delete the temporary file
     os.remove(temp_file_path)

 import streamlit as st
 from tempfile import NamedTemporaryFile
 import os
+from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
 # Function to save the uploaded PDF to a temporary file
 def save_uploaded_file(uploaded_file):
         temp_file.write(uploaded_file.read())
         return temp_file.name
 # Streamlit UI
 st.title("PDF Question Answering App")
 uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
     # Save the uploaded file to a temporary location
     temp_file_path = save_uploaded_file(uploaded_file)
+    # Load the pre-trained question-answering model and tokenizer
+    model_name = "deepset/roberta-base-squad2"
+    model = AutoModelForQuestionAnswering.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
     # Define a function to get answers
+    def get_answer(question, context):
+        inputs = tokenizer(question, context, return_tensors="pt")
+        start_scores, end_scores = model(**inputs)
+        answer_start = start_scores.argmax()
+        answer_end = end_scores.argmax() + 1
+        answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
+        return answer
     question = st.text_input("Enter your question:")
+    context = st.text_area("Enter the context:")
     if st.button("Get Answer"):
+        if not context:
+            st.warning("Please provide context for the question.")
+        else:
+            answer = get_answer(question, context)
+            st.write("Answer:")
+            st.write(answer)
     # Cleanup: Delete the temporary file
     os.remove(temp_file_path)