# basic imports import os, spaces import streamlit as st from PyPDF2 import PdfReader # langchain imports from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import FAISS from langchain.prompts import PromptTemplate from langchain.chains.question_answering import load_qa_chain from langchain_google_genai import ChatGoogleGenerativeAI from langchain_google_genai import GoogleGenerativeAIEmbeddings # api key config os.environ['GOOGLE_API_KEY'] = os.getenv('geminiapi') # define LLM llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro") # define vector store file name file_name = 'all_vec_db' ## function for loading pdfs def pdf_loader(files): all_text = '' for file in files: pdf_reader = PdfReader(file) for page in pdf_reader.pages: all_text += page.extract_text() return all_text ## function for chunking def chunk_creator(all_text): text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=50) chunks = text_splitter.split_text(all_text) return chunks ## function for creating embeddings def embedding_creator(chunks): embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # Setting up vector db vecstore = FAISS.from_texts(chunks, embeddings) vecstore.save_local(file_name) ## function for retrieving similar vectors from db def retrieve_similar(query): embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") vector_db = FAISS.load_local(file_name, embeddings, allow_dangerous_deserialization=True) similar_texts = vector_db.similarity_search(query) return similar_texts ## function for getting answer from the LLM def retrieve_answer_LLM(docs,query): p_template = ''' Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n Context:\n {context}?\n Question: \n{question}\n Answer: ''' prompt = PromptTemplate(template=p_template, input_variables=['context','question']) chain = load_qa_chain(llm_gemini, chain_type='stuff', prompt=prompt) response = chain({"input_documents":docs,"question": query}, return_only_outputs=True) return response["output_text"] # Page config st.header("PDF QnA BOT🤖") st.sidebar.title("Add PDFs") uploaded_files = st.sidebar.file_uploader("Browse", accept_multiple_files=True) upload_button = st.sidebar.button("Upload") sidebar_placeholder = st.sidebar.empty() st.subheader("Enter Query:") user_query = st.text_input("Talk to the files!") submit_button = st.button("Submit") # action on file upload if upload_button: sidebar_placeholder.text("Reading Files...") all_pdfs_text = pdf_loader(uploaded_files) sidebar_placeholder.text("Chunking Text...") text_chunked = chunk_creator(all_pdfs_text) sidebar_placeholder.text("Creating Text Embeddings...") embedding_creator(text_chunked) sidebar_placeholder.text("Files read successfully!") # action on query submission if submit_button: pbar = st.progress(25,text='Retrieving similar Chunks...') docs = retrieve_similar(user_query) pbar.progress(50,"Querying LLM...") result = retrieve_answer_LLM(docs, user_query) pbar.progress(100,"Displaying Results") st.subheader("Results:") st.write(result)