import streamlit as st import os from tempfile import NamedTemporaryFile from langchain.document_loaders import PyPDFLoader from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from langchain import PromptTemplate, LLMChain from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline # Function to save the uploaded PDF to a temporary file def save_uploaded_file(uploaded_file): with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(uploaded_file.read()) return temp_file.name # Streamlit UI st.title("PDF Question Answering App") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: # Save the uploaded file to a temporary location temp_file_path = save_uploaded_file(uploaded_file) # Load the PDF document using PyPDFLoader loader = PyPDFLoader(temp_file_path) pages = loader.load_and_split() # Initialize embeddings and Chroma embed = HuggingFaceEmbeddings() db = Chroma.from_documents(pages, embed) # Define a function to get answers def get_answer(question): doc = db.similarity_search(question, k=4) context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content #max_seq_length = 512 # You may define this based on your model #context = context[:max_seq_length] # Load the model & tokenizer for question-answering model_name = "deepset/roberta-base-squad2" model = AutoModelForQuestionAnswering.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Create a question-answering pipeline nlp = pipeline("question-answering", model=model, tokenizer=tokenizer) # Prepare the input QA_input = { "question": question, "context": context, } # Get the answer result = nlp(**QA_input) return result["answer"] question = st.text_input("Enter your question:") if st.button("Get Answer"): answer = get_answer(question) st.write("Answer:") st.write(answer) # Cleanup: Delete the temporary file os.remove(temp_file_path)