Spaces:

Manoj21k
/

Custom-QandA

Sleeping

App Files Files Community

Manoj21k commited on Sep 23, 2023

Commit

405d7a9

•

1 Parent(s): b9c3d84

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -19

app.py CHANGED Viewed

@@ -1,6 +1,12 @@
 import streamlit as st
-from tempfile import NamedTemporaryFile
 import os
 from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
 # Function to save the uploaded PDF to a temporary file
@@ -16,29 +22,52 @@ if uploaded_file is not None:
     # Save the uploaded file to a temporary location
     temp_file_path = save_uploaded_file(uploaded_file)
-    # Load the pre-trained question-answering model and tokenizer
-    model_name = "deepset/roberta-base-squad2"
-    model = AutoModelForQuestionAnswering.from_pretrained(model_name)
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
     # Define a function to get answers
-    def get_answer(question, context):
-        inputs = tokenizer(question, context, return_tensors="pt")
-        start_scores, end_scores = model(**inputs)
-        answer_start = start_scores.argmax()
-        answer_end = end_scores.argmax() + 1
-        answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
-        return answer
     question = st.text_input("Enter your question:")
-    context = st.text_area("Enter the context:")
     if st.button("Get Answer"):
-        if not context:
-            st.warning("Please provide context for the question.")
-        else:
-            answer = get_answer(question, context)
-            st.write("Answer:")
-            st.write(answer)
     # Cleanup: Delete the temporary file
     os.remove(temp_file_path)

 import streamlit as st
 import os
+from tempfile import NamedTemporaryFile
+from langchain.document_loaders import PyPDFLoader
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.llms import GPT4All
+from langchain.chains.question_answering import load_qa_chain
+from langchain import PromptTemplate, LLMChain
 from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
 # Function to save the uploaded PDF to a temporary file
     # Save the uploaded file to a temporary location
     temp_file_path = save_uploaded_file(uploaded_file)
+    # Load the PDF document using PyPDFLoader
+    loader = PyPDFLoader(temp_file_path)
+    pages = loader.load_and_split()
+    # Initialize embeddings and Chroma
+    embed = HuggingFaceEmbeddings()
+    db = Chroma.from_documents(pages, embed)
+    # Initialize the GPT-4 model
+    llm = GPT4All(
+        model="./gpt4all-converted.bin"
+    )
     # Define a function to get answers
+    def get_answer(question):
+        doc = db.similarity_search(question, k=4)
+        context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content
+        max_seq_length = 512  # You may define this based on your model
+        context = context[:max_seq_length]
+        # Load the model & tokenizer for question-answering
+        model_name = "deepset/roberta-base-squad2"
+        model = AutoModelForQuestionAnswering.from_pretrained(model_name)
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Create a question-answering pipeline
+        nlp = pipeline("question-answering", model=model, tokenizer=tokenizer)
+        # Prepare the input
+        QA_input = {
+            "question": question,
+            "context": context,
+        }
+        # Get the answer
+        result = nlp(**QA_input)
+        return result["answer"]
     question = st.text_input("Enter your question:")
     if st.button("Get Answer"):
+        answer = get_answer(question)
+        st.write("Answer:")
+        st.write(answer)
     # Cleanup: Delete the temporary file
     os.remove(temp_file_path)