|
from datasets import load_from_disk, load_dataset |
|
import pandas as pd |
|
import os |
|
import gradio as gr |
|
|
|
ds_with_embeddings = load_dataset("svjack/bloom-dialogue-generate-ds-en", split="train") |
|
ds_with_embeddings.add_faiss_index(column='embeddings') |
|
from sentence_transformers import SentenceTransformer |
|
encoder = SentenceTransformer("sentence-transformers/LaBSE") |
|
|
|
def retrieve_search_df(question = "Which diet you want to eat?", top_k = 10): |
|
question_embedding = encoder.encode(question) |
|
scores, retrieved_examples = ds_with_embeddings.get_nearest_examples('embeddings', question_embedding, k=top_k) |
|
sdf = pd.DataFrame(retrieved_examples) |
|
sdf["scores"] = scores |
|
return sdf[["question", "dialogue_text", "dialogue", "repo", "scores"]] |
|
|
|
example_sample = [ |
|
["Which diet you want to eat?", 3], |
|
["Do you like this film?", 5], |
|
] |
|
|
|
def demo_func(prefix, max_length): |
|
max_length = max(int(max_length), 3) |
|
l = retrieve_search_df(prefix, max_length)["dialogue"].values.tolist() |
|
assert type(l) == type([]) |
|
return { |
|
"Dialogue Context": l |
|
} |
|
|
|
demo = gr.Interface( |
|
fn=demo_func, |
|
inputs=[gr.Text(label = "Prefix"), |
|
gr.Number(label = "Top K", value = 10) |
|
], |
|
outputs="json", |
|
title=f"Bloom English Daliy Dialogue Generator 🦅🌸 sample search demonstration", |
|
description = 'This _example_ was **drive** from <br/><b><h4>[https://github.com/svjack/Daliy-Dialogue](https://github.com/svjack/Daliy-Dialogue)</h4></b>\n', |
|
examples=example_sample if example_sample else None, |
|
cache_examples = False |
|
) |
|
|
|
demo.launch(server_name=None, server_port=None) |
|
|