whoops-explorer / app.py
yonatanbitton's picture
first commit
b2c3e0e
import random
import gradio as gr
from datasets import load_dataset
import os
auth_token = os.environ.get("auth_token")
whoops = load_dataset("nlphuji/whoops", use_auth_token=auth_token)['test']
print(f"Loaded WMTIS, first example:")
print(whoops[0])
dataset_size = len(whoops)
print(f"all dataset size: {dataset_size}")
IMAGE = 'image'
IMAGE_DESIGNER = 'image_designer'
DESIGNER_EXPLANATION = 'designer_explanation'
CROWD_CAPTIONS = 'crowd_captions'
CROWD_EXPLANATIONS = 'crowd_explanations'
CROWD_UNDERSPECIFIED_CAPTIONS = 'crowd_underspecified_captions'
SELECTED_CAPTION = 'selected_caption'
COMMONSENSE_CATEGORY = 'commonsense_category'
QA = 'question_answering_pairs'
IMAGE_ID = 'image_id'
left_side_columns = [IMAGE]
right_side_columns = [x for x in whoops.features.keys() if x not in left_side_columns and x not in [QA]]
enumerate_cols = [CROWD_CAPTIONS, CROWD_EXPLANATIONS, CROWD_UNDERSPECIFIED_CAPTIONS]
emoji_to_label = {IMAGE_DESIGNER: '🎨, πŸ§‘β€πŸŽ¨, πŸ’»', DESIGNER_EXPLANATION: 'πŸ’‘, πŸ€”, πŸ§‘β€πŸŽ¨',
CROWD_CAPTIONS: 'πŸ‘₯, πŸ’¬, πŸ“', CROWD_EXPLANATIONS: 'πŸ‘₯, πŸ’‘, πŸ€”', CROWD_UNDERSPECIFIED_CAPTIONS: 'πŸ‘₯, πŸ’¬, πŸ‘Ž',
QA: '❓, πŸ€”, πŸ’‘', IMAGE_ID: 'πŸ”, πŸ“„, πŸ’Ύ', COMMONSENSE_CATEGORY: 'πŸ€”, πŸ“š, πŸ’‘', SELECTED_CAPTION: 'πŸ“, πŸ‘Œ, πŸ’¬'}
target_size = (1024, 1024)
def get_instance_values(example):
values = []
for k in left_side_columns + right_side_columns:
if k in enumerate_cols:
value = list_to_string(example[k])
elif k == QA:
qa_list = [f"Q: {x[0]} A: {x[1]}" for x in example[k]]
value = list_to_string(qa_list)
else:
value = example[k]
values.append(value)
return values
def list_to_string(lst):
return '\n'.join(['{}. {}'.format(i + 1, item) for i, item in enumerate(lst)])
def plot_image(index):
example = whoops_sample[index]
instance_values = get_instance_values(example)
assert len(left_side_columns) == len(
instance_values[:len(left_side_columns)]) # excluding the image & designer
for key, value in zip(left_side_columns, instance_values[:len(left_side_columns)]):
if key == IMAGE:
img = whoops_sample[index]["image"]
img_resized = img.resize(target_size)
gr.Image(value=img_resized, label=whoops_sample[index]['commonsense_category'])
else:
label = key.capitalize().replace("_", " ")
gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}")
with gr.Accordion("Click for details", open=False):
assert len(right_side_columns) == len(
instance_values[len(left_side_columns):]) # excluding the image & designer
for key, value in zip(right_side_columns, instance_values[len(left_side_columns):]):
label = key.capitalize().replace("_", " ")
gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}")
columns_number = 3
# rows_number = int(dataset_size / columns_number)
rows_number = 25
whoops_sample = whoops.shuffle().select(range(0, columns_number * rows_number))
index = 0
with gr.Blocks() as demo:
gr.Markdown(f"# WHOOPS! Dataset Explorer")
for row_num in range(0, rows_number):
with gr.Row():
for col_num in range(0, columns_number):
with gr.Column():
plot_image(index)
index += 1
demo.launch()