Spaces:
Runtime error
Runtime error
added mock top failure clusters.
Browse files- app.py +24 -2
- data.csv +6 -6
- flagged/log.csv +22 -0
app.py
CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
|
|
4 |
|
5 |
df = pd.read_csv("./data.csv")
|
6 |
|
|
|
7 |
def md_builder(model, dataset, displayed_metrics):
|
8 |
row = df[df["friendly_name"] == model]
|
9 |
str = (
|
@@ -29,6 +30,10 @@ def md_builder(model, dataset, displayed_metrics):
|
|
29 |
if "Fairness" in displayed_metrics:
|
30 |
str += f"\nFairness: `{0}`"
|
31 |
|
|
|
|
|
|
|
|
|
32 |
str += "\n<div style='text-align: right'>⛶ Expand safety card</div>"
|
33 |
|
34 |
return str
|
@@ -49,7 +54,20 @@ iface = gr.Interface(
|
|
49 |
label="Dataset",
|
50 |
info="Select the sampling dataset to use for testing.",
|
51 |
),
|
52 |
-
gr.CheckboxGroup(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
# gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?"),
|
54 |
# gr.Dropdown(
|
55 |
# ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Activity", info="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nisl eget ultricies aliquam, nunc nisl aliquet nunc, eget aliquam nisl nunc vel nisl."
|
@@ -58,7 +76,11 @@ iface = gr.Interface(
|
|
58 |
],
|
59 |
"markdown",
|
60 |
examples=[
|
61 |
-
[
|
|
|
|
|
|
|
|
|
62 |
],
|
63 |
)
|
64 |
|
|
|
4 |
|
5 |
df = pd.read_csv("./data.csv")
|
6 |
|
7 |
+
|
8 |
def md_builder(model, dataset, displayed_metrics):
|
9 |
row = df[df["friendly_name"] == model]
|
10 |
str = (
|
|
|
30 |
if "Fairness" in displayed_metrics:
|
31 |
str += f"\nFairness: `{0}`"
|
32 |
|
33 |
+
if "Failure Clusters" in displayed_metrics:
|
34 |
+
cl_count = row['cluster_count'].values[0]
|
35 |
+
str += f"\n<details><summary>Top failures: <code>{row['top_failure_cluster'].values[0]}</code> (+{cl_count - 1} others)</summary>(details for all {cl_count} clusters)</details>"
|
36 |
+
|
37 |
str += "\n<div style='text-align: right'>⛶ Expand safety card</div>"
|
38 |
|
39 |
return str
|
|
|
54 |
label="Dataset",
|
55 |
info="Select the sampling dataset to use for testing.",
|
56 |
),
|
57 |
+
gr.CheckboxGroup(
|
58 |
+
[
|
59 |
+
"Performance",
|
60 |
+
"Accuracy",
|
61 |
+
"Precision",
|
62 |
+
"Recall",
|
63 |
+
"Robustness",
|
64 |
+
"Fairness",
|
65 |
+
"Failure Clusters",
|
66 |
+
],
|
67 |
+
value=["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
|
68 |
+
label="Metrics",
|
69 |
+
info="Select displayed metrics.",
|
70 |
+
),
|
71 |
# gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?"),
|
72 |
# gr.Dropdown(
|
73 |
# ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Activity", info="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nisl eget ultricies aliquam, nunc nisl aliquet nunc, eget aliquam nisl nunc vel nisl."
|
|
|
76 |
],
|
77 |
"markdown",
|
78 |
examples=[
|
79 |
+
[
|
80 |
+
"ViT",
|
81 |
+
"marmal88/skin_cancer",
|
82 |
+
["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
|
83 |
+
],
|
84 |
],
|
85 |
)
|
86 |
|
data.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
id,loss,accuracy,f1_macro,f1_micro,f1_weighted,precision_macro,precision_micro,precision_weighted,recall_macro,recall_micro,recall_weighted,friendly_name,robustness,performance
|
2 |
-
#50807121081,0.0514,0.9867,0.9839,0.9867,0.9867,0.9845,0.9867,0.9873,0.9841,0.9867,0.9867,Swin Transformer (small),24,12
|
3 |
-
#50807121082,0.0341,0.9933,0.9920,0.9933,0.9933,0.9922,0.9933,0.9935,0.9919,0.9933,0.9933,ViT,27,13
|
4 |
-
#50807121083,0.9992,0.5067,0.3474,0.5067,0.3968,0.6261,0.5067,0.5996,0.4095,0.5067,0.5067,ResNet,3,38
|
5 |
-
#50807121084,0.0523,0.9800,0.9805,0.9800,0.9800,0.9857,0.9800,0.9809,0.9760,0.9800,0.9800,Swin Transformer (large),19,12
|
6 |
-
#50807121085,0.0393,0.9733,0.9707,0.9733,0.9732,0.9739,0.9733,0.9734,0.9679,0.9733,0.9733,BEiT,19,12
|
|
|
1 |
+
id,loss,accuracy,f1_macro,f1_micro,f1_weighted,precision_macro,precision_micro,precision_weighted,recall_macro,recall_micro,recall_weighted,friendly_name,robustness,performance,top_failure_cluster,cluster_count
|
2 |
+
#50807121081,0.0514,0.9867,0.9839,0.9867,0.9867,0.9845,0.9867,0.9873,0.9841,0.9867,0.9867,Swin Transformer (small),24,12,Gaussian Blur,6
|
3 |
+
#50807121082,0.0341,0.9933,0.9920,0.9933,0.9933,0.9922,0.9933,0.9935,0.9919,0.9933,0.9933,ViT,27,13,Brightness,8
|
4 |
+
#50807121083,0.9992,0.5067,0.3474,0.5067,0.3968,0.6261,0.5067,0.5996,0.4095,0.5067,0.5067,ResNet,3,38,Brightness,2
|
5 |
+
#50807121084,0.0523,0.9800,0.9805,0.9800,0.9800,0.9857,0.9800,0.9809,0.9760,0.9800,0.9800,Swin Transformer (large),19,12,Brightness,5
|
6 |
+
#50807121085,0.0393,0.9733,0.9707,0.9733,0.9732,0.9739,0.9733,0.9734,0.9679,0.9733,0.9733,BEiT,19,12,Dark Spots,7
|
flagged/log.csv
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Dataset,Metrics,output,flag,username,timestamp
|
2 |
+
ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
|
3 |
+
<p>On dataset <code>marmal88/skin_cancer</code></p>
|
4 |
+
<p>Accuracy: <code>0.9933</code><br>
|
5 |
+
Robustness: <code>73</code><br>
|
6 |
+
Fairness: <code>0</code></p>
|
7 |
+
<details><summary>Top failures: <code>`Brightness` (+7 others)</code></summary>hi</details>
|
8 |
+
<div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:19:45.974654
|
9 |
+
ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
|
10 |
+
<p>On dataset <code>marmal88/skin_cancer</code></p>
|
11 |
+
<p>Accuracy: <code>0.9933</code><br>
|
12 |
+
Robustness: <code>73</code><br>
|
13 |
+
Fairness: <code>0</code></p>
|
14 |
+
<details><summary>Top failures: <code>`Brightness` (+7 others)</code></summary>hi</details>
|
15 |
+
<div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:19:49.014781
|
16 |
+
ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
|
17 |
+
<p>On dataset <code>marmal88/skin_cancer</code></p>
|
18 |
+
<p>Accuracy: <code>0.9933</code><br>
|
19 |
+
Robustness: <code>73</code><br>
|
20 |
+
Fairness: <code>0</code></p>
|
21 |
+
<details><summary>Top failures: `Brightness` (+7 others)</summary>(demo content)</details>
|
22 |
+
<div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:40:36.839513
|