File size: 4,185 Bytes
73f994d c2c48ba 73f994d 78bc38f beb7716 78bc38f 705accd 78bc38f 705accd d84f2d5 229cfd1 c2c48ba 1d92ef6 73f994d 6d2ea25 73f994d 6d2ea25 73f994d 81d7443 73f994d 81d7443 73f994d 81d7443 73f994d face539 ab57a2b 0b29fc0 face539 73f994d cb5d97e 73f994d 81d7443 9fdd6f5 73f994d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import pandas as pd
import gradio as gr
from datasets import load_dataset
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain_mistralai.chat_models import ChatMistralAI
dataset_names = ["zelros/pj-ce", "zelros/pj-da",
"zelros/pj-groupama", "zelros/pj-sg",
"zelros/pj-lbp", "zelros/pj-maif", "zelros/pj-ca"]
insurers = ["Caisse d'Epargne","Direct Assurance","Groupama",
"Société Générale","La Banque Postale", "MAIF", "Crédit Agricole"]
db_dict = {}
def llm_response(question, insurer1, insurer2, gpt):
if(gpt == "mistral-large-latest"):
llm1 = ChatMistralAI(model=gpt)
llm2 = ChatMistralAI(model=gpt)
else:
llm1 = ChatOpenAI(model_name=gpt)
llm2 = ChatOpenAI(model_name=gpt)
qa_chain1 = RetrievalQA.from_chain_type(llm1, retriever=db_dict[insurer1].as_retriever(search_kwargs={'k': 8}))
qa_chain2 = RetrievalQA.from_chain_type(llm2, retriever=db_dict[insurer2].as_retriever(search_kwargs={'k': 8}))
return qa_chain1({"query": question})['result'], qa_chain2({"query": question})['result']
examples = [
["Qui contacter en cas besoin ?", None, None, None],
["Les problèmes de divorce sont-ils couverts ?", None, None, None],
["En cas violences conjugales, puis-je être assisté ?", None, None, None],
["Les problèmes d'usurpation d'identité sont-ils couverts ?", None, None, None],
["Quel est le montant maximum couvert pour un litige ?", None, None, None],
["Quels frais sont pris en charges, et quels frais ne sont pas pris en charge ?", None, None, None],
["En tant que membre d'une association, suis-je couvert ?", None, None, None],
["J'ai un litige concernant un brevet et un sujet de propriété intellectuelle, suis-je couvert ?", None, None, None],
["Quels sont les moments où le contrat peut être résilié ?", None, None, None]
]
for i, name in enumerate(dataset_names):
dataset = load_dataset(name)
df = dataset['train'].to_pandas()
df['text'] = df["title"] + df["content"]
loader = DataFrameLoader(df, 'text')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=5000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)
db_dict[insurers[i]] = db
demo = gr.Interface(llm_response,
inputs=[
gr.Textbox(label="Question",
info="More examples below :)",
value="Quel est le montant maximum couvert pour un litige ?"),
gr.Dropdown(choices=insurers,
label="Insurer 1",
value="Société Générale",
info="More insurers available soon !"),
gr.Dropdown(choices=insurers,
label="Insurer 2",
value="La Banque Postale"),
gr.Dropdown(choices=["gpt-4o-mini-2024-07-18","gpt-4o-2024-08-06","mistral-large-latest"],
label="LLM",
value="mistral-large-latest",
info="Compare gpt-4o-mini, gpt-4o, and mistral-large!"),
],
outputs=[gr.Textbox(label="Answer insurer 1"),
gr.Textbox(label="Answer insurer 2")
],
title='Towards more accessible and inclusive insurances',
description='### <center>Compare french legal protection insurances - Research project, non-binding information - Please refer to a professional for advice.',
examples=examples,
cache_examples=False)
demo.launch() |