import pandas as pd import gradio as gr from datasets import load_dataset from langchain.document_loaders import DataFrameLoader from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI from langchain_mistralai.chat_models import ChatMistralAI dataset_names = ["zelros/pj-ce", "zelros/pj-da", "zelros/pj-groupama", "zelros/pj-sg", "zelros/pj-lbp", "zelros/pj-maif", "zelros/pj-ca"] insurers = ["Caisse d'Epargne","Direct Assurance","Groupama", "Société Générale","La Banque Postale", "MAIF", "Crédit Agricole"] db_dict = {} def llm_response(insurer1, insurer2, question, gpt): if(gpt == "mistral-large-latest"): llm1 = ChatMistralAI(model=gpt) llm2 = ChatMistralAI(model=gpt) else: llm1 = ChatOpenAI(model_name=gpt) llm2 = ChatOpenAI(model_name=gpt) qa_chain1 = RetrievalQA.from_chain_type(llm1, retriever=db_dict[insurer1].as_retriever(search_kwargs={'k': 8})) qa_chain2 = RetrievalQA.from_chain_type(llm2, retriever=db_dict[insurer2].as_retriever(search_kwargs={'k': 8})) return qa_chain1({"query": question})['result'], qa_chain2({"query": question})['result'] examples = [ [None, None, "Qui contacter en cas besoin ?"], [None, None, "Les problèmes de divorce sont-ils couverts ?"], [None, None, "En cas violences conjugales, puis-je être assisté ?"], [None, None, "Les problèmes d'usurpation d'identité sont-ils couverts ?"], [None, None, "Quel est le montant maximum couvert pour un litige ?"], [None, None, "Quels frais sont pris en charges, et quels frais ne sont pas pris en charge ?"], [None, None, "En tant que membre d'une association, suis-je couvert ?"], [None, None, "J'ai un litige concernant un brevet et un sujet de propriété intellectuelle, suis-je couvert ?"], [None, None, "Quels sont les moments où le contrat peut être résilié ?"] ] for i, name in enumerate(dataset_names): dataset = load_dataset(name) df = dataset['train'].to_pandas() df['text'] = df["title"] + df["content"] loader = DataFrameLoader(df, 'text') documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=5000, chunk_overlap=0) texts = text_splitter.split_documents(documents) embeddings = OpenAIEmbeddings() db = FAISS.from_documents(texts, embeddings) db_dict[insurers[i]] = db demo = gr.Interface(llm_response, inputs=[ gr.Dropdown(choices=insurers, label="Insurer 1", value="Société Générale", info="More insurers available soon !"), gr.Dropdown(choices=insurers, label="Insurer 2", value="La Banque Postale"), gr.Textbox(label="Question", info="More examples below :)", value="Quel est le montant maximum couvert pour un litige ?"), gr.Dropdown(choices=["gpt-3.5-turbo","gpt-4-turbo-preview","mistral-large-latest"], label="LLM", value="mistral-large-latest", info="Compare gpt-3, gpt-4, and mistral-large!"), ], outputs=[gr.Textbox(label="Answer insurer 1"), gr.Textbox(label="Answer insurer 2") ], title='Towards more accessible and inclusive insurances', description='###
Compare french legal protection insurances - Research project, non-binding information - Please refer to a professional for advice.', examples=examples, cache_examples=False) demo.launch()