Spaces:

Manoj21k
/

Custom-QandA

Sleeping

File size: 2,282 Bytes

cbc6f35
c8862b8
405d7a9
 
 
 
8ec4279
b9c3d84
cbc6f35
 
 
 
 
 
 
c8862b8
cbc6f35
 
 
 
 
 
405d7a9
 
 
 
 
 
 
 
cbc6f35
405d7a9
 
 
 
8ec4279
405d7a9
8ec4279
405d7a9
 
519d51e
405d7a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbc6f35
 
 
405d7a9
 
 
cbc6f35

import streamlit as st
import os
from tempfile import NamedTemporaryFile
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain import PromptTemplate, LLMChain
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

# Function to save the uploaded PDF to a temporary file
def save_uploaded_file(uploaded_file):
    with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
        temp_file.write(uploaded_file.read())
        return temp_file.name

# Streamlit UI
st.title("PDF Question Answering App")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
    # Save the uploaded file to a temporary location
    temp_file_path = save_uploaded_file(uploaded_file)

    # Load the PDF document using PyPDFLoader
    loader = PyPDFLoader(temp_file_path)
    pages = loader.load_and_split()

    # Initialize embeddings and Chroma
    embed = HuggingFaceEmbeddings()
    db = Chroma.from_documents(pages, embed)

    # Define a function to get answers
    def get_answer(question):
        doc = db.similarity_search(question, k=4)
        context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content

        #max_seq_length = 512  # You may define this based on your model

        #context = context[:max_seq_length]

        # Load the model & tokenizer for question-answering
        model_name = "deepset/roberta-base-squad2"
        model = AutoModelForQuestionAnswering.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        # Create a question-answering pipeline
        nlp = pipeline("question-answering", model=model, tokenizer=tokenizer)

        # Prepare the input
        QA_input = {
            "question": question,
            "context": context,
        }

        # Get the answer
        result = nlp(**QA_input)

        return result["answer"]

    question = st.text_input("Enter your question:")
    if st.button("Get Answer"):
        answer = get_answer(question)
        st.write("Answer:")
        st.write(answer)

    # Cleanup: Delete the temporary file
    os.remove(temp_file_path)