import streamlit as st from dotenv import load_dotenv import os from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain_community.vectorstores import FAISS from langchain.prompts import PromptTemplate from langchain_community.llms import HuggingFaceHub from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser st.title('LLM - Retrieval Augmented Generation') model_names = ['tiiuae/falcon-7b-instruct', 'google/gemma-2-2b', 'mistralai/Mistral-7B-v0.1'] api_urls = ['https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct', 'https://api-inference.huggingface.co/models/google/gemma-2-2b', "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"] model_dict = dict(zip(model_names, api_urls)) # user-input pdf = st.file_uploader(label='Upload PDF') # define new template for RAG start_metatag = "[INST]" default_template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Never stop generating mid-sentence. Question: {question} Context: {context} Answer: """ end_metatag = "[/INST]" # sidebar parameters with st.sidebar: st.write('# Retrieval parameters') chunk_size = st.number_input(label='Chunk size', value=250, step=10) chunk_overlap = st.number_input(label='Chunk overlap', value=50, step=10) st.write('# Prompt') rag_template = st.text_area(label='Prompt template', value=default_template, height=250) st.write('# LLM parameters') model = st.selectbox(label='Model', options=model_names, index=0) temperature = st.slider(label='Model Temperature', min_value=0.1, max_value=float(10), value=float(1), step=0.1) # full template template = start_metatag + '\n\n' + rag_template + '\n\n' + end_metatag # question question = st.text_input(label='Question') def authenticate(): try: st.write('Authenticated with HuggingFace:', os.environ["HUGGINGFACEHUB_API_TOKEN"] == st.secrets["HUGGINGFACEHUB_API_TOKEN"]) except: st.write('Cannot find HugginFace API token. Ensure it is located in .streamlit/secrets.toml') def load_pdf(pdf): reader = PdfReader(pdf) page_limit = len(reader.pages) if page_limit is None: page_limit=len(reader.pages) text = "" for i in range(page_limit): page_text = reader.pages[i].extract_text() text += page_text # if st.toggle(label='Show text'): # st.write(text) return text def split_text(text, chunk_size=400, chunk_overlap=20): # split text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n\n", "\n", " ", ""] ) # use text_splitter to split text chunks = text_splitter.split_text(text) return chunks def store_text(chunks): # select model to create embeddings embeddings = HuggingFaceInstructEmbeddings(model_name='hkunlp/instructor-large') # select vectorstore, define text chunks and embeddings model vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings) return vectorstore @st.cache_resource def load_split_store(pdf, chunk_size, chunk_overlap): # load split store text = load_pdf(pdf=pdf) chunks = split_text(text, chunk_size, chunk_overlap) vectorstore = store_text(chunks) return vectorstore def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) @st.cache_resource def instantiate_llm(model, temperature): # instantiate llm llm = HuggingFaceHub( repo_id=model, model_kwargs={ 'temperature':temperature, # 'penalty_alpha':2, # 'top_k':50, # # 'max_length': 1000 } ) llm.client.api_url = model_dict[str(model)] return llm @st.cache_resource def instantiate_prompt(_llm, template=template): # build prompt prompt = PromptTemplate( template=template, llm=_llm, input_variables=['question', 'context'] ) return prompt def main(): # authenticate authenticate() # instantiate llm llm = instantiate_llm(model, temperature) # build prompt prompt = instantiate_prompt(_llm=llm) # if a PDF exists if pdf is not None: # load split store vectorstore = load_split_store(pdf, chunk_size=chunk_size, chunk_overlap=chunk_overlap) st.write('PDF processed') # create a retriever using vectorstore retriever = vectorstore.as_retriever() # create retrieval chain retrieval_chain = ( retriever | format_docs ) # create generation chain generation_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) # button press if st.button(label='Ask question'): with st.spinner('Processing'): # context st.write('# Context') st.write(retrieval_chain.invoke(question)) # answer st.write('# Answer') st.write(generation_chain.invoke(question)) if __name__=='__main__': main()