safety-card / app.py
mmathys's picture
small fixes
ddc0a5b
raw
history blame
3.93 kB
# %%
from jinja2 import Environment, FileSystemLoader
import pandas as pd
import gradio as gr
df = pd.read_csv("./data.csv")
def parse_into_jinja_markdown(
model_name,
performance,
accuracy,
Precision,
Recall,
Robustness,
Fairness,
Failure_Clusters,
):
env = Environment(loader=FileSystemLoader("."), autoescape=True)
temp = env.get_template("mc_template.md")
return temp.render(
model_id=model_name,
accuracy=accuracy,
Precision=Precision,
Recall=Recall,
Robustness=Robustness,
Fairness=Fairness,
Performance=performance,
Failure_Cluster=Failure_Clusters,
)
def md_builder(model, dataset, displayed_metrics):
row = df[df["friendly_name"] == model]
str = ""
## f"# <span style='font-size: 16px;'> Model Card for <code style='font-weight: 400'>{model}</code></span>\n"
##f"On dataset `{dataset}`\n"
## )
# init vars to empty string
(
perform_val,
accuracy_val,
precision_val,
recall_val,
robustness_val,
fairness_val,
fail_cluster,
) = ("", "", "", "", "", "", "")
if "Performance" in displayed_metrics:
perform_val = f"\nPerformance: `{row['performance'].values[0]}`"
if "Accuracy" in displayed_metrics:
accuracy_val = f"\nAccuracy: `{row['accuracy'].values[0]}`"
if "Precision" in displayed_metrics:
precision_val = f"\nPrecision: `{row['precision_weighted'].values[0]}`"
if "Recall" in displayed_metrics:
recall_val = f"\nRecall: `{row['recall_weighted'].values[0]}`"
if "Robustness" in displayed_metrics:
robustness_val = f"\nRobustness: `{100-row['robustness'].values[0]}`"
if "Fairness" in displayed_metrics:
fairness_val = f"\nFairness: `{0}`"
if "Failure Clusters" in displayed_metrics:
cl_count = row["cluster_count"].values[0]
fail_cluster = f"\nTop failures: {row['top_failure_cluster'].values[0]} (+{cl_count - 1} others) (details for all {cl_count} clusters)"
str += "\n<div style='text-align: right'>⛶ Expand safety card</div>"
str = parse_into_jinja_markdown(
model,
perform_val,
accuracy_val,
precision_val,
recall_val,
robustness_val,
fairness_val,
fail_cluster,
)
return str
iface = gr.Interface(
md_builder,
[
gr.Dropdown(
list(df["friendly_name"]),
label="Model",
value="ViT",
info="Select a model to use for testing.",
),
gr.Dropdown(
["marmal88/skin_cancer"],
value="marmal88/skin_cancer",
label="Dataset",
info="Select the sampling dataset to use for testing.",
),
gr.CheckboxGroup(
[
"Performance",
"Accuracy",
"Precision",
"Recall",
"Robustness",
"Fairness",
"Failure Clusters",
],
value=["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
label="Metrics",
info="Select displayed metrics.",
),
# gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?"),
# gr.Dropdown(
# ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Activity", info="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nisl eget ultricies aliquam, nunc nisl aliquet nunc, eget aliquam nisl nunc vel nisl."
# ),
# gr.Checkbox(label="Morning", info="Did they do it in the morning?"),
],
"markdown",
examples=[
[
"ViT",
"marmal88/skin_cancer",
["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
],
],
)
iface.launch()
# %%