mmathys commited on
Commit
a293aa3
1 Parent(s): a48afe6

added mock top failure clusters.

Browse files
Files changed (3) hide show
  1. app.py +24 -2
  2. data.csv +6 -6
  3. flagged/log.csv +22 -0
app.py CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
4
 
5
  df = pd.read_csv("./data.csv")
6
 
 
7
  def md_builder(model, dataset, displayed_metrics):
8
  row = df[df["friendly_name"] == model]
9
  str = (
@@ -29,6 +30,10 @@ def md_builder(model, dataset, displayed_metrics):
29
  if "Fairness" in displayed_metrics:
30
  str += f"\nFairness: `{0}`"
31
 
 
 
 
 
32
  str += "\n<div style='text-align: right'>⛶ Expand safety card</div>"
33
 
34
  return str
@@ -49,7 +54,20 @@ iface = gr.Interface(
49
  label="Dataset",
50
  info="Select the sampling dataset to use for testing.",
51
  ),
52
- gr.CheckboxGroup(["Performance", "Accuracy", "Precision", "Recall", "Robustness", "Fairness"], value=["Accuracy", "Robustness"], label="Metrics", info="Select displayed metrics."),
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?"),
54
  # gr.Dropdown(
55
  # ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Activity", info="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nisl eget ultricies aliquam, nunc nisl aliquet nunc, eget aliquam nisl nunc vel nisl."
@@ -58,7 +76,11 @@ iface = gr.Interface(
58
  ],
59
  "markdown",
60
  examples=[
61
- ["ViT", "marmal88/skin_cancer", ["Accuracy", "Robustness"]],
 
 
 
 
62
  ],
63
  )
64
 
 
4
 
5
  df = pd.read_csv("./data.csv")
6
 
7
+
8
  def md_builder(model, dataset, displayed_metrics):
9
  row = df[df["friendly_name"] == model]
10
  str = (
 
30
  if "Fairness" in displayed_metrics:
31
  str += f"\nFairness: `{0}`"
32
 
33
+ if "Failure Clusters" in displayed_metrics:
34
+ cl_count = row['cluster_count'].values[0]
35
+ str += f"\n<details><summary>Top failures: <code>{row['top_failure_cluster'].values[0]}</code> (+{cl_count - 1} others)</summary>(details for all {cl_count} clusters)</details>"
36
+
37
  str += "\n<div style='text-align: right'>⛶ Expand safety card</div>"
38
 
39
  return str
 
54
  label="Dataset",
55
  info="Select the sampling dataset to use for testing.",
56
  ),
57
+ gr.CheckboxGroup(
58
+ [
59
+ "Performance",
60
+ "Accuracy",
61
+ "Precision",
62
+ "Recall",
63
+ "Robustness",
64
+ "Fairness",
65
+ "Failure Clusters",
66
+ ],
67
+ value=["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
68
+ label="Metrics",
69
+ info="Select displayed metrics.",
70
+ ),
71
  # gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?"),
72
  # gr.Dropdown(
73
  # ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Activity", info="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nisl eget ultricies aliquam, nunc nisl aliquet nunc, eget aliquam nisl nunc vel nisl."
 
76
  ],
77
  "markdown",
78
  examples=[
79
+ [
80
+ "ViT",
81
+ "marmal88/skin_cancer",
82
+ ["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
83
+ ],
84
  ],
85
  )
86
 
data.csv CHANGED
@@ -1,6 +1,6 @@
1
- id,loss,accuracy,f1_macro,f1_micro,f1_weighted,precision_macro,precision_micro,precision_weighted,recall_macro,recall_micro,recall_weighted,friendly_name,robustness,performance
2
- #50807121081,0.0514,0.9867,0.9839,0.9867,0.9867,0.9845,0.9867,0.9873,0.9841,0.9867,0.9867,Swin Transformer (small),24,12
3
- #50807121082,0.0341,0.9933,0.9920,0.9933,0.9933,0.9922,0.9933,0.9935,0.9919,0.9933,0.9933,ViT,27,13
4
- #50807121083,0.9992,0.5067,0.3474,0.5067,0.3968,0.6261,0.5067,0.5996,0.4095,0.5067,0.5067,ResNet,3,38
5
- #50807121084,0.0523,0.9800,0.9805,0.9800,0.9800,0.9857,0.9800,0.9809,0.9760,0.9800,0.9800,Swin Transformer (large),19,12
6
- #50807121085,0.0393,0.9733,0.9707,0.9733,0.9732,0.9739,0.9733,0.9734,0.9679,0.9733,0.9733,BEiT,19,12
 
1
+ id,loss,accuracy,f1_macro,f1_micro,f1_weighted,precision_macro,precision_micro,precision_weighted,recall_macro,recall_micro,recall_weighted,friendly_name,robustness,performance,top_failure_cluster,cluster_count
2
+ #50807121081,0.0514,0.9867,0.9839,0.9867,0.9867,0.9845,0.9867,0.9873,0.9841,0.9867,0.9867,Swin Transformer (small),24,12,Gaussian Blur,6
3
+ #50807121082,0.0341,0.9933,0.9920,0.9933,0.9933,0.9922,0.9933,0.9935,0.9919,0.9933,0.9933,ViT,27,13,Brightness,8
4
+ #50807121083,0.9992,0.5067,0.3474,0.5067,0.3968,0.6261,0.5067,0.5996,0.4095,0.5067,0.5067,ResNet,3,38,Brightness,2
5
+ #50807121084,0.0523,0.9800,0.9805,0.9800,0.9800,0.9857,0.9800,0.9809,0.9760,0.9800,0.9800,Swin Transformer (large),19,12,Brightness,5
6
+ #50807121085,0.0393,0.9733,0.9707,0.9733,0.9732,0.9739,0.9733,0.9734,0.9679,0.9733,0.9733,BEiT,19,12,Dark Spots,7
flagged/log.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Dataset,Metrics,output,flag,username,timestamp
2
+ ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
3
+ <p>On dataset <code>marmal88/skin_cancer</code></p>
4
+ <p>Accuracy: <code>0.9933</code><br>
5
+ Robustness: <code>73</code><br>
6
+ Fairness: <code>0</code></p>
7
+ <details><summary>Top failures: <code>`Brightness` (+7 others)</code></summary>hi</details>
8
+ <div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:19:45.974654
9
+ ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
10
+ <p>On dataset <code>marmal88/skin_cancer</code></p>
11
+ <p>Accuracy: <code>0.9933</code><br>
12
+ Robustness: <code>73</code><br>
13
+ Fairness: <code>0</code></p>
14
+ <details><summary>Top failures: <code>`Brightness` (+7 others)</code></summary>hi</details>
15
+ <div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:19:49.014781
16
+ ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
17
+ <p>On dataset <code>marmal88/skin_cancer</code></p>
18
+ <p>Accuracy: <code>0.9933</code><br>
19
+ Robustness: <code>73</code><br>
20
+ Fairness: <code>0</code></p>
21
+ <details><summary>Top failures: `Brightness` (+7 others)</summary>(demo content)</details>
22
+ <div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:40:36.839513