|
import pandas as pd |
|
import gradio as gr |
|
from datasets import load_dataset |
|
from langchain.document_loaders import DataFrameLoader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import OpenAIEmbeddings |
|
from langchain.chains import RetrievalQA |
|
from langchain.chat_models import ChatOpenAI |
|
|
|
dataset_names = ["zelros/pj-ca", "zelros/pj-ce", "zelros/pj-da", |
|
"zelros/pj-groupama", "zelros/pj-sg", "zelros/pj-lbp"] |
|
|
|
insurers = ["Crédit Agricole","Caisse d'Epargne","Direct Assurance","Groupama","Société Générale","La Banque Postale"] |
|
|
|
db_dict = {} |
|
|
|
def llm_response(insurer1, insurer2, question): |
|
qa_chain1 = RetrievalQA.from_chain_type(llm1, retriever=db_dict[insurer1].as_retriever(search_kwargs={'k': 8})) |
|
qa_chain2 = RetrievalQA.from_chain_type(llm2, retriever=db_dict[insurer2].as_retriever(search_kwargs={'k': 8})) |
|
return qa_chain1({"query": question})['result'], qa_chain2({"query": question})['result'] |
|
|
|
examples = [ |
|
[None, None, "Est-il possible de choisir son avocat ?"], |
|
[None, None, "Les problèmes de divorce sont-ils couverts ?"], |
|
[None, None, "Quels frais sont pris en charges, et quels frais ne sont pas pris en charge ?"], |
|
[None, None, "En tant que membre d'une association, suis-je couvert ?"], |
|
[None, None, "J'ai un litige concernant un brevet et un sujet de propriété intellectuelle, suis-je couvert ?"], |
|
[None, None, "Comment résilier le contrat ?"] |
|
] |
|
|
|
for i, name in enumerate(dataset_names): |
|
dataset = load_dataset(name) |
|
|
|
df = dataset['train'].to_pandas() |
|
df['text'] = df["title"] + df["content"] |
|
|
|
loader = DataFrameLoader(df, 'text') |
|
documents = loader.load() |
|
text_splitter = CharacterTextSplitter(chunk_size=5000, chunk_overlap=0) |
|
texts = text_splitter.split_documents(documents) |
|
embeddings = OpenAIEmbeddings() |
|
|
|
db = FAISS.from_documents(texts, embeddings) |
|
db_dict[insurers[i]] = db |
|
|
|
llm1 = ChatOpenAI(model_name="gpt-4") |
|
llm2 = ChatOpenAI(model_name="gpt-4") |
|
|
|
demo = gr.Interface(llm_response, |
|
inputs=[ |
|
gr.Dropdown(choices=insurers, |
|
label="Assureur 1", |
|
value="Société Générale", |
|
info="De nouveaux assureurs seront disponibles prochainement !"), |
|
gr.Dropdown(choices=insurers, |
|
label="Assureur 2", |
|
value="La Banque Postale"), |
|
gr.Textbox(label="Question", |
|
info="Quelques exemples ci-dessous :)") |
|
], |
|
outputs=[gr.Textbox(label="Réponse assureur 1"), |
|
gr.Textbox(label="Réponse assureur 2") |
|
], |
|
title='Pour une Assurance plus accessible et compréhensible', |
|
description='### <center>Comparez les assurances protection juridique', |
|
examples=examples, |
|
cache_examples=False) |
|
|
|
demo.launch() |