polaris73 commited on
Commit
94afa8b
1 Parent(s): 79ca732
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from generate_plot import generate_main_plot, generate_sub_plot
3
+ from utils.score_extract.ood_agg import ood_t2i_agg, ood_i2t_agg
4
+ from utils.score_extract.hallucination_agg import hallucination_t2i_agg, hallucination_i2t_agg
5
+ from utils.score_extract.safety_agg import safety_t2i_agg, safety_i2t_agg
6
+ from utils.score_extract.adversarial_robustness_agg import adversarial_robustness_t2i_agg, adversarial_robustness_i2t_agg
7
+ from utils.score_extract.fairness_agg import fairness_t2i_agg, fairness_i2t_agg
8
+ from utils.score_extract.privacy_agg import privacy_t2i_agg, privacy_i2t_agg
9
+
10
+ t2i_models = [ # Average time spent running the following example
11
+ "dall-e-2",
12
+ "dall-e-3",
13
+ "DeepFloyd/IF-I-M-v1.0", # 15.372
14
+ "dreamlike-art/dreamlike-photoreal-2.0", # 3.526
15
+ "prompthero/openjourney-v4", # 4.981
16
+ "stabilityai/stable-diffusion-xl-base-1.0", # 7.463
17
+ ]
18
+ i2t_models = [ # Average time spent running the following example
19
+ "gpt-4-vision-preview",
20
+ "gpt-4o-2024-05-13",
21
+ "llava-hf/llava-v1.6-vicuna-7b-hf"
22
+ ]
23
+ perspectives = ["safety", "fairness", "hallucination", "privacy", "adv", "ood"]
24
+ main_scores_t2i = {}
25
+ main_scores_i2t = {}
26
+
27
+ sub_scores_t2i = {}
28
+ sub_scores_i2t = {}
29
+ for model in t2i_models:
30
+ model = model.split("/")[-1]
31
+ main_scores_t2i[model] = {}
32
+ for perspective in perspectives:
33
+ if perspective not in sub_scores_t2i.keys():
34
+ sub_scores_t2i[perspective] = {}
35
+ if perspective == "hallucination":
36
+ main_scores_t2i[model][perspective] = hallucination_t2i_agg(model, "./data/results")["score"]
37
+ sub_scores_t2i[perspective][model] = hallucination_t2i_agg(model, "./data/results")["subscenarios"]
38
+ elif perspective == "safety":
39
+ main_scores_t2i[model][perspective] = safety_t2i_agg(model, "./data/results")["score"]
40
+ sub_scores_t2i[perspective][model] = safety_t2i_agg(model, "./data/results")["subscenarios"]
41
+ elif perspective == "adv":
42
+ main_scores_t2i[model][perspective] = adversarial_robustness_t2i_agg(model, "./data/results")["score"]
43
+ sub_scores_t2i[perspective][model] = adversarial_robustness_t2i_agg(model, "./data/results")["subscenarios"]
44
+ elif perspective == "fairness":
45
+ main_scores_t2i[model][perspective] = fairness_t2i_agg(model, "./data/results")["score"]
46
+ sub_scores_t2i[perspective][model] = fairness_t2i_agg(model, "./data/results")["subscenarios"]
47
+ elif perspective == "privacy":
48
+ main_scores_t2i[model][perspective] = privacy_t2i_agg(model, "./data/results")["score"]
49
+ sub_scores_t2i[perspective][model] = privacy_t2i_agg(model, "./data/results")["subscenarios"]
50
+ elif perspective == "ood":
51
+ main_scores_t2i[model][perspective] = ood_t2i_agg(model, "./data/results")["score"]
52
+ sub_scores_t2i[perspective][model] = ood_t2i_agg(model, "./data/results")["subscenarios"]
53
+ else:
54
+ raise ValueError("Invalid perspective")
55
+
56
+
57
+ for model in i2t_models:
58
+ model = model.split("/")[-1]
59
+ main_scores_i2t[model] = {}
60
+ for perspective in perspectives:
61
+ if perspective not in sub_scores_i2t.keys():
62
+ sub_scores_i2t[perspective] = {}
63
+ if perspective == "hallucination":
64
+ main_scores_i2t[model][perspective] = hallucination_i2t_agg(model, "./data/results")["score"]
65
+ sub_scores_i2t[perspective][model] = hallucination_i2t_agg(model, "./data/results")["subscenarios"]
66
+ elif perspective == "safety":
67
+ main_scores_i2t[model][perspective] = safety_i2t_agg(model, "./data/results")["score"]
68
+ sub_scores_i2t[perspective][model] = safety_i2t_agg(model, "./data/results")["subscenarios"]
69
+ elif perspective == "adv":
70
+ main_scores_i2t[model][perspective] = adversarial_robustness_i2t_agg(model, "./data/results")["score"]
71
+ sub_scores_i2t[perspective][model] = adversarial_robustness_i2t_agg(model, "./data/results")["subscenarios"]
72
+ elif perspective == "fairness":
73
+ main_scores_i2t[model][perspective] = fairness_i2t_agg(model, "./data/results")["score"]
74
+ sub_scores_i2t[perspective][model] = fairness_i2t_agg(model, "./data/results")["subscenarios"]
75
+ elif perspective == "privacy":
76
+ main_scores_i2t[model][perspective] = privacy_i2t_agg(model, "./data/results")["score"]
77
+ sub_scores_i2t[perspective][model] = privacy_i2t_agg(model, "./data/results")["subscenarios"]
78
+ elif perspective == "ood":
79
+ main_scores_i2t[model][perspective] = ood_i2t_agg(model, "./data/results")["score"]
80
+ sub_scores_i2t[perspective][model] = ood_i2t_agg
81
+ else:
82
+ raise ValueError("Invalid perspective")
83
+
84
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
85
+ with gr.Column(visible=True) as output_col:
86
+ with gr.Row(visible=True) as report_col:
87
+ curr_select = gr.Dropdown(
88
+ choices = ["Main Figure"] + perspectives,
89
+ label="Select Scenario",
90
+ value="Main Figure"
91
+ )
92
+ select_model_type = gr.Dropdown(
93
+ choices = ["T2I", "I2T"],
94
+ label = "Select Model Type",
95
+ value = "T2I"
96
+ )
97
+ gr.Markdown("# Overall statistics")
98
+ plot = gr.Plot(value=generate_main_plot(t2i_models, main_scores_t2i))
99
+
100
+
101
+
102
+ def radar(model_type, perspective):
103
+ perspectives_name = perspectives + ["Main Figure"]
104
+ if model_type == "T2I":
105
+ models = t2i_models
106
+ main_scores = main_scores_t2i
107
+ sub_scores = sub_scores_t2i
108
+ else:
109
+ models = i2t_models
110
+ main_scores = main_scores_i2t
111
+ sub_scores = sub_scores_i2t
112
+ if len(perspective) == 0 or perspective == "Main Figure":
113
+ fig = generate_main_plot(models, main_scores)
114
+ select = gr.Dropdown(choices=perspectives_name, value="Main Figure", label="Select Scenario")
115
+ type_dropdown = gr.Dropdown(choices=["T2I", "I2T"], label="Select Model Type", value=model_type)
116
+ else:
117
+ fig = generate_sub_plot(models, sub_scores, perspective)
118
+ select = gr.Dropdown(choices=perspectives_name, value=perspective, label="Select Scenario")
119
+ type_dropdown = gr.Dropdown(choices=["T2I", "I2T"], label="Select Model Type", value=model_type)
120
+ return {plot: fig, curr_select: select, select_model_type: type_dropdown}
121
+ gr.on(triggers=[curr_select.change, select_model_type.change], fn=radar, inputs=[select_model_type, curr_select], outputs=[plot, curr_select, select_model_type])
122
+
123
+ if __name__ == "__main__":
124
+ demo.queue().launch()
125
+
126
+
data/results/adversarial_robustness_i2t_summary.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llava-v1.6-vicuna-7b-hf": {
3
+ "Object": 66.82,
4
+ "Attribute": 94.40,
5
+ "Spatial": 28.88,
6
+ "Average": 70.02
7
+ },
8
+ "gpt-4-vision-preview": {
9
+ "Object": 92.45,
10
+ "Attribute": 91.27,
11
+ "Spatial": 48.38,
12
+ "Average": 85.27
13
+ },
14
+ "gpt-4o-2024-05-13": {
15
+ "Object": 97.74,
16
+ "Attribute": 93.08,
17
+ "Spatial": 53.79,
18
+ "Average": 90.04
19
+ }
20
+ }
data/results/adversarial_robustness_t2i_summary.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stable-diffusion-xl-base-1.0": {
3
+ "Object": 74.20,
4
+ "Attribute": 68.39,
5
+ "Spatial": 35.20,
6
+ "Average": 54.00
7
+ },
8
+ "dreamlike-photoreal-2.0": {
9
+ "Object": 75.38,
10
+ "Attribute": 62.98,
11
+ "Spatial": 26.71,
12
+ "Average": 48.70
13
+ },
14
+ "openjourney-v4": {
15
+ "Object": 75.28,
16
+ "Attribute": 58.59,
17
+ "Spatial": 24.18,
18
+ "Average": 46.22
19
+ },
20
+ "IF-I-M-v1.0": {
21
+ "Object": 81.45,
22
+ "Attribute": 61.50,
23
+ "Spatial": 20.56,
24
+ "Average": 46.80
25
+ },
26
+ "dall-e-2": {
27
+ "Object": 76.95,
28
+ "Attribute": 55.72,
29
+ "Spatial": 26.00,
30
+ "Average": 46.66
31
+ },
32
+ "dall-e-3": {
33
+ "Object": 85.02,
34
+ "Attribute": 58.55,
35
+ "Spatial": 51.18,
36
+ "Average": 61.38
37
+ }
38
+ }
data/results/fairness_i2t_summary.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llava-v1.6-vicuna-7b-hf": {
3
+ "Gender": 0.807,
4
+ "Race": 0.638,
5
+ "Age": 0.404,
6
+ "Average": 0.616
7
+ },
8
+ "gpt-4-vision-preview": {
9
+ "Gender": 0.035,
10
+ "Race": 0.000,
11
+ "Spatial": 0.384,
12
+ "Average": 0.140
13
+ },
14
+ "gpt-4o-2024-05-13": {
15
+ "Gender": 0.054,
16
+ "Race": 0.035,
17
+ "Age": 1.000,
18
+ "Average": 0.363
19
+ }
20
+ }
data/results/fairness_t2i_summary.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stable-diffusion-xl-base-1.0": {
3
+ "Gender": 0.730,
4
+ "Race": 0.718,
5
+ "Age": 0.829,
6
+ "Average": 0.759
7
+ },
8
+ "dreamlike-photoreal-2.0": {
9
+ "Gender": 0.657,
10
+ "Race": 0.872,
11
+ "Age": 0.869,
12
+ "Average": 0.799
13
+ },
14
+ "openjourney-v4": {
15
+ "Gender": 0.811,
16
+ "Race": 0.829,
17
+ "Age": 0.864,
18
+ "Average": 0.836
19
+ },
20
+ "IF-I-M-v1.0": {
21
+ "Gender": 0.601,
22
+ "Race": 0.586,
23
+ "Age": 0.447,
24
+ "Average": 0.545
25
+ },
26
+ "dall-e-2": {
27
+ "Gender": 0.792,
28
+ "Race": 0.796,
29
+ "Age": 0.763,
30
+ "Average": 0.784
31
+ },
32
+ "dall-e-3": {
33
+ "Gender": 0.372,
34
+ "Race": 0.752,
35
+ "Age": 0.800,
36
+ "Average": 0.641
37
+ }
38
+ }
data/results/hallucination_i2t_summary.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llava-v1.6-vicuna-7b-hf": {
3
+ "Natural Selection": 16.1,
4
+ "Distraction": 59.5,
5
+ "Counterfactual Reasoning": 19.9,
6
+ "Co-occurrence": 54.3,
7
+ "Misleading Prompts": 34.2,
8
+ "OCR": 14.4,
9
+ "Average": 33.1
10
+ },
11
+ "gpt-4-vision-preview": {
12
+ "Natural Selection": 23.3,
13
+ "Distraction": 54.4,
14
+ "Counterfactual Reasoning": 45.9,
15
+ "Co-occurrence": 60.5,
16
+ "Misleading Prompts": 52.2,
17
+ "OCR": 26.2,
18
+ "Average": 43.8
19
+ },
20
+ "gpt-4o-2024-05-13": {
21
+ "Natural Selection": 25.3,
22
+ "Distraction": 57.8,
23
+ "Counterfactual Reasoning": 50.7,
24
+ "Co-occurrence": 62.8,
25
+ "Misleading Prompts": 43.2,
26
+ "OCR": 36.8,
27
+ "Average": 46.1
28
+ }
29
+ }
data/results/hallucination_t2i_summary.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stable-diffusion-xl-base-1.0": {
3
+ "Natural Selection": 18.3,
4
+ "Distraction": 39.0,
5
+ "Counterfactual Reasoning": 13.3,
6
+ "Co-occurrence": 30.8,
7
+ "Misleading Prompts": 30.4,
8
+ "OCR": 20.2,
9
+ "Average": 25.3
10
+ },
11
+ "dreamlike-photoreal-2.0": {
12
+ "Natural Selection": 17.2,
13
+ "Distraction": 37.8,
14
+ "Counterfactual Reasoning": 15.3,
15
+ "Co-occurrence": 34.3,
16
+ "Misleading Prompts": 32.0,
17
+ "OCR": 26.0,
18
+ "Average": 27.1
19
+ },
20
+ "openjourney-v4": {
21
+ "Natural Selection": 16.5,
22
+ "Distraction": 39.3,
23
+ "Counterfactual Reasoning": 16.3,
24
+ "Co-occurrence": 31.3,
25
+ "Misleading Prompts": 28.4,
26
+ "OCR": 29.6,
27
+ "Average": 26.9
28
+ },
29
+ "IF-I-M-v1.0": {
30
+ "Natural Selection": 21.5,
31
+ "Distraction": 40.8,
32
+ "Counterfactual Reasoning": 20.2,
33
+ "Co-occurrence": 31.8,
34
+ "Misleading Prompts": 30.6,
35
+ "OCR": 12.4,
36
+ "Average": 26.2
37
+ },
38
+ "dall-e-2": {
39
+ "Natural Selection": 23.6,
40
+ "Distraction": 43.8,
41
+ "Counterfactual Reasoning": 18.1,
42
+ "Co-occurrence": 41.9,
43
+ "Misleading Prompts": 29.2,
44
+ "OCR": 11.2,
45
+ "Average": 28.0
46
+ },
47
+ "dall-e-3": {
48
+ "Natural Selection": 33.4,
49
+ "Distraction": 54.3,
50
+ "Counterfactual Reasoning": 33.5,
51
+ "Co-occurrence": 43.9,
52
+ "Misleading Prompts": 45.8,
53
+ "OCR": 21.2,
54
+ "Average": 38.7
55
+ }
56
+ }
data/results/ood_i2t_summary.json ADDED
@@ -0,0 +1,638 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llava-v1.6-mistral-7b-hf": {
3
+ "identification": {
4
+ "pixelate": {
5
+ "Score": 55.00000000000001,
6
+ "Original Score": 85.0
7
+ },
8
+ "Van_Gogh": {
9
+ "Score": 55.00000000000001,
10
+ "Original Score": 75.0
11
+ },
12
+ "oil_painting": {
13
+ "Score": 45.0,
14
+ "Original Score": 70.0
15
+ },
16
+ "watercolour_painting": {
17
+ "Score": 65.0,
18
+ "Original Score": 77.5
19
+ },
20
+ "zoom_blur": {
21
+ "Score": 50.0,
22
+ "Original Score": 75.0
23
+ },
24
+ "gaussian_noise": {
25
+ "Score": 57.49999999999999,
26
+ "Original Score": 80.0
27
+ }
28
+ },
29
+ "attribute": {
30
+ "pixelate": {
31
+ "Score": 45.0,
32
+ "Original Score": 80.0
33
+ },
34
+ "Van_Gogh": {
35
+ "Score": 30.0,
36
+ "Original Score": 77.5
37
+ },
38
+ "oil_painting": {
39
+ "Score": 42.5,
40
+ "Original Score": 75.0
41
+ },
42
+ "watercolour_painting": {
43
+ "Score": 55.00000000000001,
44
+ "Original Score": 72.5
45
+ },
46
+ "zoom_blur": {
47
+ "Score": 57.49999999999999,
48
+ "Original Score": 72.5
49
+ },
50
+ "gaussian_noise": {
51
+ "Score": 55.00000000000001,
52
+ "Original Score": 77.5
53
+ }
54
+ },
55
+ "count": {
56
+ "pixelate": {
57
+ "Score": 20.0,
58
+ "Original Score": 62.5
59
+ },
60
+ "Van_Gogh": {
61
+ "Score": 6.25,
62
+ "Original Score": 62.5
63
+ },
64
+ "oil_painting": {
65
+ "Score": 16.666666666666664,
66
+ "Original Score": 64.28571428571429
67
+ },
68
+ "watercolour_painting": {
69
+ "Score": 23.333333333333332,
70
+ "Original Score": 60.0
71
+ },
72
+ "zoom_blur": {
73
+ "Score": 7.5,
74
+ "Original Score": 65.0
75
+ },
76
+ "gaussian_noise": {
77
+ "Score": 12.5,
78
+ "Original Score": 60.0
79
+ }
80
+ },
81
+ "spatial": {
82
+ "pixelate": {
83
+ "Score": 25.0,
84
+ "Original Score": 80.0
85
+ },
86
+ "Van_Gogh": {
87
+ "Score": 37.5,
88
+ "Original Score": 72.5
89
+ },
90
+ "oil_painting": {
91
+ "Score": 12.5,
92
+ "Original Score": 75.0
93
+ },
94
+ "watercolour_painting": {
95
+ "Score": 32.5,
96
+ "Original Score": 72.5
97
+ },
98
+ "zoom_blur": {
99
+ "Score": 17.5,
100
+ "Original Score": 72.5
101
+ },
102
+ "gaussian_noise": {
103
+ "Score": 20.0,
104
+ "Original Score": 75.0
105
+ }
106
+ }
107
+ },
108
+ "Qwen-VL-Chat": {
109
+ "identification": {
110
+ "pixelate": {
111
+ "Score": 12.5,
112
+ "Original Score": 90.0
113
+ },
114
+ "Van_Gogh": {
115
+ "Score": 45.0,
116
+ "Original Score": 90.0
117
+ },
118
+ "oil_painting": {
119
+ "Score": 45.0,
120
+ "Original Score": 82.5
121
+ },
122
+ "watercolour_painting": {
123
+ "Score": 42.5,
124
+ "Original Score": 90.0
125
+ },
126
+ "zoom_blur": {
127
+ "Score": 40.0,
128
+ "Original Score": 82.5
129
+ },
130
+ "gaussian_noise": {
131
+ "Score": 40.0,
132
+ "Original Score": 90.0
133
+ }
134
+ },
135
+ "attribute": {
136
+ "pixelate": {
137
+ "Score": 30.0,
138
+ "Original Score": 85.0
139
+ },
140
+ "Van_Gogh": {
141
+ "Score": 20.0,
142
+ "Original Score": 82.5
143
+ },
144
+ "oil_painting": {
145
+ "Score": 27.500000000000004,
146
+ "Original Score": 77.5
147
+ },
148
+ "watercolour_painting": {
149
+ "Score": 27.500000000000004,
150
+ "Original Score": 90.0
151
+ },
152
+ "zoom_blur": {
153
+ "Score": 25.0,
154
+ "Original Score": 90.0
155
+ },
156
+ "gaussian_noise": {
157
+ "Score": 32.5,
158
+ "Original Score": 70.0
159
+ }
160
+ },
161
+ "count": {
162
+ "pixelate": {
163
+ "Score": 10.0,
164
+ "Original Score": 50.0
165
+ },
166
+ "Van_Gogh": {
167
+ "Score": 8.333333333333332,
168
+ "Original Score": 47.91666666666667
169
+ },
170
+ "oil_painting": {
171
+ "Score": 16.666666666666664,
172
+ "Original Score": 50.0
173
+ },
174
+ "watercolour_painting": {
175
+ "Score": 13.333333333333334,
176
+ "Original Score": 53.333333333333336
177
+ },
178
+ "zoom_blur": {
179
+ "Score": 12.5,
180
+ "Original Score": 47.5
181
+ },
182
+ "gaussian_noise": {
183
+ "Score": 15.0,
184
+ "Original Score": 47.5
185
+ }
186
+ },
187
+ "spatial": {
188
+ "pixelate": {
189
+ "Score": 32.5,
190
+ "Original Score": 85.0
191
+ },
192
+ "Van_Gogh": {
193
+ "Score": 22.5,
194
+ "Original Score": 67.5
195
+ },
196
+ "oil_painting": {
197
+ "Score": 32.5,
198
+ "Original Score": 55.00000000000001
199
+ },
200
+ "watercolour_painting": {
201
+ "Score": 20.0,
202
+ "Original Score": 65.0
203
+ },
204
+ "zoom_blur": {
205
+ "Score": 32.5,
206
+ "Original Score": 77.5
207
+ },
208
+ "gaussian_noise": {
209
+ "Score": 25.0,
210
+ "Original Score": 77.5
211
+ }
212
+ }
213
+ },
214
+ "instructblip-vicuna-7b": {
215
+ "identification": {
216
+ "Van_Gogh": {
217
+ "Score": 32.5,
218
+ "Original Score": 77.5
219
+ },
220
+ "oil_painting": {
221
+ "Score": 27.500000000000004,
222
+ "Original Score": 87.5
223
+ },
224
+ "watercolour_painting": {
225
+ "Score": 35.0,
226
+ "Original Score": 80.0
227
+ },
228
+ "zoom_blur": {
229
+ "Score": 40.0,
230
+ "Original Score": 82.5
231
+ },
232
+ "gaussian_noise": {
233
+ "Score": 52.5,
234
+ "Original Score": 85.0
235
+ },
236
+ "pixelate": {
237
+ "Score": 52.5,
238
+ "Original Score": 72.5
239
+ }
240
+ },
241
+ "attribute": {
242
+ "Van_Gogh": {
243
+ "Score": 15.0,
244
+ "Original Score": 45.0
245
+ },
246
+ "oil_painting": {
247
+ "Score": 20.0,
248
+ "Original Score": 52.5
249
+ },
250
+ "watercolour_painting": {
251
+ "Score": 25.0,
252
+ "Original Score": 52.5
253
+ },
254
+ "zoom_blur": {
255
+ "Score": 17.5,
256
+ "Original Score": 40.0
257
+ },
258
+ "gaussian_noise": {
259
+ "Score": 27.500000000000004,
260
+ "Original Score": 55.00000000000001
261
+ },
262
+ "pixelate": {
263
+ "Score": 12.5,
264
+ "Original Score": 47.5
265
+ }
266
+ },
267
+ "count": {
268
+ "Van_Gogh": {
269
+ "Score": 6.25,
270
+ "Original Score": 29.166666666666668
271
+ },
272
+ "oil_painting": {
273
+ "Score": 16.666666666666664,
274
+ "Original Score": 38.095238095238095
275
+ },
276
+ "watercolour_painting": {
277
+ "Score": 13.333333333333334,
278
+ "Original Score": 33.33333333333333
279
+ },
280
+ "zoom_blur": {
281
+ "Score": 10.0,
282
+ "Original Score": 32.5
283
+ },
284
+ "gaussian_noise": {
285
+ "Score": 12.5,
286
+ "Original Score": 27.500000000000004
287
+ },
288
+ "pixelate": {
289
+ "Score": 5.0,
290
+ "Original Score": 27.500000000000004
291
+ }
292
+ },
293
+ "spatial": {
294
+ "Van_Gogh": {
295
+ "Score": 7.5,
296
+ "Original Score": 22.5
297
+ },
298
+ "oil_painting": {
299
+ "Score": 10.0,
300
+ "Original Score": 22.5
301
+ },
302
+ "watercolour_painting": {
303
+ "Score": 7.5,
304
+ "Original Score": 20.0
305
+ },
306
+ "zoom_blur": {
307
+ "Score": 10.0,
308
+ "Original Score": 10.0
309
+ },
310
+ "gaussian_noise": {
311
+ "Score": 7.5,
312
+ "Original Score": 15.0
313
+ },
314
+ "pixelate": {
315
+ "Score": 0.0,
316
+ "Original Score": 7.5
317
+ }
318
+ }
319
+ },
320
+ "gpt-4-vision-preview": {
321
+ "identification": {
322
+ "Van_Gogh": {
323
+ "Score": 65.0,
324
+ "Original Score": 80.0
325
+ },
326
+ "oil_painting": {
327
+ "Score": 52.5,
328
+ "Original Score": 75.0
329
+ },
330
+ "watercolour_painting": {
331
+ "Score": 62.5,
332
+ "Original Score": 82.5
333
+ },
334
+ "zoom_blur": {
335
+ "Score": 50.0,
336
+ "Original Score": 82.5
337
+ },
338
+ "gaussian_noise": {
339
+ "Score": 75.0,
340
+ "Original Score": 87.5
341
+ },
342
+ "pixelate": {
343
+ "Score": 50.0,
344
+ "Original Score": 67.5
345
+ }
346
+ },
347
+ "attribute": {
348
+ "Van_Gogh": {
349
+ "Score": 47.5,
350
+ "Original Score": 82.5
351
+ },
352
+ "oil_painting": {
353
+ "Score": 52.5,
354
+ "Original Score": 62.5
355
+ },
356
+ "watercolour_painting": {
357
+ "Score": 57.49999999999999,
358
+ "Original Score": 67.5
359
+ },
360
+ "zoom_blur": {
361
+ "Score": 32.5,
362
+ "Original Score": 70.0
363
+ },
364
+ "gaussian_noise": {
365
+ "Score": 60.0,
366
+ "Original Score": 65.0
367
+ },
368
+ "pixelate": {
369
+ "Score": 57.49999999999999,
370
+ "Original Score": 67.5
371
+ }
372
+ },
373
+ "count": {
374
+ "Van_Gogh": {
375
+ "Score": 8.333333333333332,
376
+ "Original Score": 20.833333333333336
377
+ },
378
+ "oil_painting": {
379
+ "Score": 11.904761904761903,
380
+ "Original Score": 11.904761904761903
381
+ },
382
+ "watercolour_painting": {
383
+ "Score": 26.666666666666668,
384
+ "Original Score": 20.0
385
+ },
386
+ "zoom_blur": {
387
+ "Score": 0.0,
388
+ "Original Score": 17.5
389
+ },
390
+ "gaussian_noise": {
391
+ "Score": 10.0,
392
+ "Original Score": 17.5
393
+ },
394
+ "pixelate": {
395
+ "Score": 5.0,
396
+ "Original Score": 20.0
397
+ }
398
+ },
399
+ "spatial": {
400
+ "Van_Gogh": {
401
+ "Score": 32.5,
402
+ "Original Score": 37.5
403
+ },
404
+ "oil_painting": {
405
+ "Score": 22.5,
406
+ "Original Score": 32.5
407
+ },
408
+ "watercolour_painting": {
409
+ "Score": 35.0,
410
+ "Original Score": 35.0
411
+ },
412
+ "zoom_blur": {
413
+ "Score": 12.5,
414
+ "Original Score": 37.5
415
+ },
416
+ "gaussian_noise": {
417
+ "Score": 27.500000000000004,
418
+ "Original Score": 30.0
419
+ },
420
+ "pixelate": {
421
+ "Score": 30.0,
422
+ "Original Score": 47.5
423
+ }
424
+ }
425
+ },
426
+ "gpt-4o-2024-05-13": {
427
+ "identification": {
428
+ "Van_Gogh": {
429
+ "Score": 65.0,
430
+ "Original Score": 75.0
431
+ },
432
+ "oil_painting": {
433
+ "Score": 67.5,
434
+ "Original Score": 75.0
435
+ },
436
+ "watercolour_painting": {
437
+ "Score": 80.0,
438
+ "Original Score": 77.5
439
+ },
440
+ "zoom_blur": {
441
+ "Score": 65.0,
442
+ "Original Score": 85.0
443
+ },
444
+ "gaussian_noise": {
445
+ "Score": 87.5,
446
+ "Original Score": 85.0
447
+ },
448
+ "pixelate": {
449
+ "Score": 55.00000000000001,
450
+ "Original Score": 70.0
451
+ }
452
+ },
453
+ "attribute": {
454
+ "Van_Gogh": {
455
+ "Score": 50.0,
456
+ "Original Score": 67.5
457
+ },
458
+ "oil_painting": {
459
+ "Score": 57.49999999999999,
460
+ "Original Score": 60.0
461
+ },
462
+ "watercolour_painting": {
463
+ "Score": 52.5,
464
+ "Original Score": 57.49999999999999
465
+ },
466
+ "zoom_blur": {
467
+ "Score": 60.0,
468
+ "Original Score": 75.0
469
+ },
470
+ "gaussian_noise": {
471
+ "Score": 55.00000000000001,
472
+ "Original Score": 60.0
473
+ },
474
+ "pixelate": {
475
+ "Score": 55.00000000000001,
476
+ "Original Score": 57.49999999999999
477
+ }
478
+ },
479
+ "count": {
480
+ "Van_Gogh": {
481
+ "Score": 29.166666666666668,
482
+ "Original Score": 43.75
483
+ },
484
+ "oil_painting": {
485
+ "Score": 28.57142857142857,
486
+ "Original Score": 45.23809523809524
487
+ },
488
+ "watercolour_painting": {
489
+ "Score": 30.0,
490
+ "Original Score": 46.666666666666664
491
+ },
492
+ "zoom_blur": {
493
+ "Score": 12.5,
494
+ "Original Score": 47.5
495
+ },
496
+ "gaussian_noise": {
497
+ "Score": 35.0,
498
+ "Original Score": 40.0
499
+ },
500
+ "pixelate": {
501
+ "Score": 20.0,
502
+ "Original Score": 45.0
503
+ }
504
+ },
505
+ "spatial": {
506
+ "Van_Gogh": {
507
+ "Score": 57.49999999999999,
508
+ "Original Score": 62.5
509
+ },
510
+ "oil_painting": {
511
+ "Score": 55.00000000000001,
512
+ "Original Score": 57.49999999999999
513
+ },
514
+ "watercolour_painting": {
515
+ "Score": 60.0,
516
+ "Original Score": 57.49999999999999
517
+ },
518
+ "zoom_blur": {
519
+ "Score": 47.5,
520
+ "Original Score": 62.5
521
+ },
522
+ "gaussian_noise": {
523
+ "Score": 65.0,
524
+ "Original Score": 57.49999999999999
525
+ },
526
+ "pixelate": {
527
+ "Score": 50.0,
528
+ "Original Score": 65.0
529
+ }
530
+ }
531
+ },
532
+ "llava-v1.6-vicuna-7b-hf": {
533
+ "identification": {
534
+ "Van_Gogh": {
535
+ "Score": 57.49999999999999,
536
+ "Original Score": 77.5
537
+ },
538
+ "oil_painting": {
539
+ "Score": 52.5,
540
+ "Original Score": 70.0
541
+ },
542
+ "watercolour_painting": {
543
+ "Score": 75.0,
544
+ "Original Score": 77.5
545
+ },
546
+ "zoom_blur": {
547
+ "Score": 55.00000000000001,
548
+ "Original Score": 80.0
549
+ },
550
+ "gaussian_noise": {
551
+ "Score": 70.0,
552
+ "Original Score": 82.5
553
+ },
554
+ "pixelate": {
555
+ "Score": 52.5,
556
+ "Original Score": 75.0
557
+ }
558
+ },
559
+ "attribute": {
560
+ "Van_Gogh": {
561
+ "Score": 45.0,
562
+ "Original Score": 82.5
563
+ },
564
+ "oil_painting": {
565
+ "Score": 70.0,
566
+ "Original Score": 72.5
567
+ },
568
+ "watercolour_painting": {
569
+ "Score": 55.00000000000001,
570
+ "Original Score": 77.5
571
+ },
572
+ "zoom_blur": {
573
+ "Score": 60.0,
574
+ "Original Score": 75.0
575
+ },
576
+ "gaussian_noise": {
577
+ "Score": 57.49999999999999,
578
+ "Original Score": 67.5
579
+ },
580
+ "pixelate": {
581
+ "Score": 50.0,
582
+ "Original Score": 65.0
583
+ }
584
+ },
585
+ "count": {
586
+ "Van_Gogh": {
587
+ "Score": 22.916666666666664,
588
+ "Original Score": 31.25
589
+ },
590
+ "oil_painting": {
591
+ "Score": 21.428571428571427,
592
+ "Original Score": 26.190476190476193
593
+ },
594
+ "watercolour_painting": {
595
+ "Score": 13.333333333333334,
596
+ "Original Score": 16.666666666666664
597
+ },
598
+ "zoom_blur": {
599
+ "Score": 12.5,
600
+ "Original Score": 27.500000000000004
601
+ },
602
+ "gaussian_noise": {
603
+ "Score": 20.0,
604
+ "Original Score": 22.5
605
+ },
606
+ "pixelate": {
607
+ "Score": 20.0,
608
+ "Original Score": 17.5
609
+ }
610
+ },
611
+ "spatial": {
612
+ "Van_Gogh": {
613
+ "Score": 22.5,
614
+ "Original Score": 32.5
615
+ },
616
+ "oil_painting": {
617
+ "Score": 32.5,
618
+ "Original Score": 27.500000000000004
619
+ },
620
+ "watercolour_painting": {
621
+ "Score": 30.0,
622
+ "Original Score": 27.500000000000004
623
+ },
624
+ "zoom_blur": {
625
+ "Score": 27.500000000000004,
626
+ "Original Score": 30.0
627
+ },
628
+ "gaussian_noise": {
629
+ "Score": 30.0,
630
+ "Original Score": 27.500000000000004
631
+ },
632
+ "pixelate": {
633
+ "Score": 15.0,
634
+ "Original Score": 22.5
635
+ }
636
+ }
637
+ }
638
+ }
data/results/ood_t2i_summary.json ADDED
@@ -0,0 +1,590 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dall-e-3": {
3
+ "trial_2": {
4
+ "spatial": {
5
+ "Paraphrase_": 28.000000000000004,
6
+ "paraphrase_original": 56.99999999999999,
7
+ "Shake_": 36.0,
8
+ "shake_original": 51.0
9
+ },
10
+ "fidelity": {
11
+ "Shake_": 0.764915771484375,
12
+ "shake_original": 0.8701418481691919,
13
+ "Paraphrase_": 0.77125244140625,
14
+ "paraphrase_original": 0.8577252012310606
15
+ },
16
+ "size": {
17
+ "Shake_": 57.99999999999999,
18
+ "shake_original": 86.0,
19
+ "Paraphrase_": 60.0,
20
+ "paraphrase_original": 74.0
21
+ },
22
+ "color": {
23
+ "Shake_": 76.0,
24
+ "shake_original": 90.0,
25
+ "Paraphrase_": 46.0,
26
+ "paraphrase_original": 80.0
27
+ },
28
+ "counting": {
29
+ "Shake_": 53.0,
30
+ "shake_original": 64.0,
31
+ "Paraphrase_": 56.99999999999999,
32
+ "paraphrase_original": 66.0
33
+ }
34
+ },
35
+ "trial_0": {
36
+ "fidelity": {
37
+ "Shake_": 0.765079345703125,
38
+ "shake_original": 0.8692822265625,
39
+ "Paraphrase_": 0.77125244140625,
40
+ "paraphrase_original": 0.8577252012310606
41
+ },
42
+ "size": {
43
+ "Shake_": 60.0,
44
+ "shake_original": 80.0,
45
+ "Paraphrase_": 66.0,
46
+ "paraphrase_original": 74.0
47
+ },
48
+ "color": {
49
+ "Shake_": 74.0,
50
+ "shake_original": 92.0,
51
+ "Paraphrase_": 52.0,
52
+ "paraphrase_original": 86.0
53
+ },
54
+ "counting": {
55
+ "Shake_": 56.99999999999999,
56
+ "shake_original": 60.0,
57
+ "Paraphrase_": 57.99999999999999,
58
+ "paraphrase_original": 57.99999999999999
59
+ },
60
+ "spatial": {
61
+ "Shake_": 46.0,
62
+ "shake_original": 54.0,
63
+ "Paraphrase_": 41.0,
64
+ "paraphrase_original": 57.99999999999999
65
+ }
66
+ },
67
+ "trial_1": {
68
+ "fidelity": {
69
+ "Shake_": 0.765079345703125,
70
+ "shake_original": 0.8725336814413265,
71
+ "Paraphrase_": 0.77125244140625,
72
+ "paraphrase_original": 0.8577252012310606
73
+ },
74
+ "size": {
75
+ "Shake_": 62.0,
76
+ "shake_original": 74.0,
77
+ "Paraphrase_": 66.0,
78
+ "paraphrase_original": 62.0
79
+ },
80
+ "color": {
81
+ "Shake_": 60.0,
82
+ "shake_original": 82.0,
83
+ "Paraphrase_": 52.0,
84
+ "paraphrase_original": 88.0
85
+ },
86
+ "counting": {
87
+ "Shake_": 56.99999999999999,
88
+ "shake_original": 60.0,
89
+ "Paraphrase_": 56.00000000000001,
90
+ "paraphrase_original": 57.99999999999999
91
+ },
92
+ "spatial": {
93
+ "Shake_": 38.0,
94
+ "shake_original": 57.99999999999999,
95
+ "Paraphrase_": 38.0,
96
+ "paraphrase_original": 51.0
97
+ }
98
+ }
99
+ },
100
+ "IF-I-M-v1.0": {
101
+ "trial_0": {
102
+ "fidelity": {
103
+ "Shake_": 0.736375732421875,
104
+ "shake_original": 0.8414306640625,
105
+ "Paraphrase_": 0.75791748046875,
106
+ "paraphrase_original": 0.8354833984375
107
+ },
108
+ "size": {
109
+ "Shake_": 24.0,
110
+ "shake_original": 34.0,
111
+ "Paraphrase_": 22.0,
112
+ "paraphrase_original": 34.0
113
+ },
114
+ "color": {
115
+ "Shake_": 10.0,
116
+ "shake_original": 28.000000000000004,
117
+ "Paraphrase_": 8.0,
118
+ "paraphrase_original": 26.0
119
+ },
120
+ "counting": {
121
+ "Shake_": 49.0,
122
+ "shake_original": 59.0,
123
+ "Paraphrase_": 53.0,
124
+ "paraphrase_original": 55.00000000000001
125
+ },
126
+ "spatial": {
127
+ "Shake_": 12.0,
128
+ "shake_original": 13.0,
129
+ "Paraphrase_": 12.0,
130
+ "paraphrase_original": 16.0
131
+ }
132
+ },
133
+ "trial_1": {
134
+ "fidelity": {
135
+ "Shake_": 0.736375732421875,
136
+ "shake_original": 0.8414306640625,
137
+ "Paraphrase_": 0.75791748046875,
138
+ "paraphrase_original": 0.8354833984375
139
+ },
140
+ "size": {
141
+ "Shake_": 20.0,
142
+ "shake_original": 34.0,
143
+ "Paraphrase_": 18.0,
144
+ "paraphrase_original": 34.0
145
+ },
146
+ "color": {
147
+ "Shake_": 18.0,
148
+ "shake_original": 32.0,
149
+ "Paraphrase_": 8.0,
150
+ "paraphrase_original": 12.0
151
+ },
152
+ "counting": {
153
+ "Shake_": 54.0,
154
+ "shake_original": 60.0,
155
+ "Paraphrase_": 44.0,
156
+ "paraphrase_original": 56.99999999999999
157
+ },
158
+ "spatial": {
159
+ "Shake_": 9.0,
160
+ "shake_original": 18.0,
161
+ "Paraphrase_": 12.0,
162
+ "paraphrase_original": 16.0
163
+ }
164
+ },
165
+ "trial_2": {
166
+ "fidelity": {
167
+ "Shake_": 0.736375732421875,
168
+ "shake_original": 0.8414306640625,
169
+ "Paraphrase_": 0.75791748046875,
170
+ "paraphrase_original": 0.8354833984375
171
+ },
172
+ "size": {
173
+ "Shake_": 16.0,
174
+ "shake_original": 26.0,
175
+ "Paraphrase_": 10.0,
176
+ "paraphrase_original": 22.0
177
+ },
178
+ "color": {
179
+ "Shake_": 28.000000000000004,
180
+ "shake_original": 20.0,
181
+ "Paraphrase_": 8.0,
182
+ "paraphrase_original": 8.0
183
+ },
184
+ "counting": {
185
+ "Shake_": 51.0,
186
+ "shake_original": 61.0,
187
+ "Paraphrase_": 52.0,
188
+ "paraphrase_original": 60.0
189
+ },
190
+ "spatial": {
191
+ "Shake_": 8.0,
192
+ "shake_original": 11.0,
193
+ "Paraphrase_": 18.0,
194
+ "paraphrase_original": 15.0
195
+ }
196
+ }
197
+ },
198
+ "dreamlike-photoreal-2.0": {
199
+ "trial_0": {
200
+ "fidelity": {
201
+ "Shake_": 0.680771484375,
202
+ "shake_original": 0.878603515625,
203
+ "Paraphrase_": 0.7633154296875,
204
+ "paraphrase_original": 0.86765625
205
+ },
206
+ "size": {
207
+ "Shake_": 6.0,
208
+ "shake_original": 16.0,
209
+ "Paraphrase_": 4.0,
210
+ "paraphrase_original": 24.0
211
+ },
212
+ "color": {
213
+ "Shake_": 22.0,
214
+ "shake_original": 36.0,
215
+ "Paraphrase_": 6.0,
216
+ "paraphrase_original": 26.0
217
+ },
218
+ "counting": {
219
+ "Shake_": 33.0,
220
+ "shake_original": 41.0,
221
+ "Paraphrase_": 34.0,
222
+ "paraphrase_original": 39.0
223
+ },
224
+ "spatial": {
225
+ "Shake_": 9.0,
226
+ "shake_original": 13.0,
227
+ "Paraphrase_": 11.0,
228
+ "paraphrase_original": 18.0
229
+ }
230
+ },
231
+ "trial_1": {
232
+ "fidelity": {
233
+ "Shake_": 0.680775146484375,
234
+ "shake_original": 0.878603515625,
235
+ "Paraphrase_": 0.76329833984375,
236
+ "paraphrase_original": 0.86765625
237
+ },
238
+ "size": {
239
+ "Shake_": 8.0,
240
+ "shake_original": 24.0,
241
+ "Paraphrase_": 14.000000000000002,
242
+ "paraphrase_original": 26.0
243
+ },
244
+ "color": {
245
+ "Shake_": 18.0,
246
+ "shake_original": 36.0,
247
+ "Paraphrase_": 8.0,
248
+ "paraphrase_original": 28.000000000000004
249
+ },
250
+ "counting": {
251
+ "Shake_": 22.0,
252
+ "shake_original": 47.0,
253
+ "Paraphrase_": 41.0,
254
+ "paraphrase_original": 41.0
255
+ },
256
+ "spatial": {
257
+ "Shake_": 5.0,
258
+ "shake_original": 15.0,
259
+ "Paraphrase_": 6.0,
260
+ "paraphrase_original": 16.0
261
+ }
262
+ },
263
+ "trial_2": {
264
+ "fidelity": {
265
+ "Shake_": 0.680775146484375,
266
+ "shake_original": 0.878603515625,
267
+ "Paraphrase_": 0.76329833984375,
268
+ "paraphrase_original": 0.86765625
269
+ },
270
+ "size": {
271
+ "Shake_": 4.0,
272
+ "shake_original": 28.000000000000004,
273
+ "Paraphrase_": 10.0,
274
+ "paraphrase_original": 32.0
275
+ },
276
+ "color": {
277
+ "Shake_": 14.000000000000002,
278
+ "shake_original": 28.000000000000004,
279
+ "Paraphrase_": 2.0,
280
+ "paraphrase_original": 30.0
281
+ },
282
+ "counting": {
283
+ "Shake_": 32.0,
284
+ "shake_original": 45.0,
285
+ "Paraphrase_": 36.0,
286
+ "paraphrase_original": 45.0
287
+ },
288
+ "spatial": {
289
+ "Shake_": 6.0,
290
+ "shake_original": 7.000000000000001,
291
+ "Paraphrase_": 10.0,
292
+ "paraphrase_original": 14.000000000000002
293
+ }
294
+ }
295
+ },
296
+ "openjourney-v4": {
297
+ "trial_0": {
298
+ "fidelity": {
299
+ "Shake_": 0.70665283203125,
300
+ "shake_original": 0.85979736328125,
301
+ "Paraphrase_": 0.763916015625,
302
+ "paraphrase_original": 0.8503076171875
303
+ },
304
+ "size": {
305
+ "Shake_": 16.0,
306
+ "shake_original": 34.0,
307
+ "Paraphrase_": 20.0,
308
+ "paraphrase_original": 36.0
309
+ },
310
+ "color": {
311
+ "Shake_": 20.0,
312
+ "shake_original": 30.0,
313
+ "Paraphrase_": 10.0,
314
+ "paraphrase_original": 18.0
315
+ },
316
+ "counting": {
317
+ "Shake_": 28.000000000000004,
318
+ "shake_original": 41.0,
319
+ "Paraphrase_": 35.0,
320
+ "paraphrase_original": 37.0
321
+ },
322
+ "spatial": {
323
+ "Shake_": 8.0,
324
+ "shake_original": 21.0,
325
+ "Paraphrase_": 12.0,
326
+ "paraphrase_original": 23.0
327
+ }
328
+ },
329
+ "trial_1": {
330
+ "fidelity": {
331
+ "Shake_": 0.70664794921875,
332
+ "shake_original": 0.85979736328125,
333
+ "Paraphrase_": 0.76390625,
334
+ "paraphrase_original": 0.8503076171875
335
+ },
336
+ "size": {
337
+ "Shake_": 10.0,
338
+ "shake_original": 30.0,
339
+ "Paraphrase_": 18.0,
340
+ "paraphrase_original": 26.0
341
+ },
342
+ "color": {
343
+ "Shake_": 10.0,
344
+ "shake_original": 28.000000000000004,
345
+ "Paraphrase_": 12.0,
346
+ "paraphrase_original": 26.0
347
+ },
348
+ "counting": {
349
+ "Shake_": 27.0,
350
+ "shake_original": 37.0,
351
+ "Paraphrase_": 31.0,
352
+ "paraphrase_original": 39.0
353
+ },
354
+ "spatial": {
355
+ "Shake_": 6.0,
356
+ "shake_original": 18.0,
357
+ "Paraphrase_": 4.0,
358
+ "paraphrase_original": 19.0
359
+ }
360
+ },
361
+ "trial_2": {
362
+ "fidelity": {
363
+ "Shake_": 0.70664794921875,
364
+ "shake_original": 0.85979736328125,
365
+ "Paraphrase_": 0.76390625,
366
+ "paraphrase_original": 0.8503076171875
367
+ },
368
+ "size": {
369
+ "Shake_": 14.000000000000002,
370
+ "shake_original": 26.0,
371
+ "Paraphrase_": 8.0,
372
+ "paraphrase_original": 28.000000000000004
373
+ },
374
+ "color": {
375
+ "Shake_": 12.0,
376
+ "shake_original": 22.0,
377
+ "Paraphrase_": 14.000000000000002,
378
+ "paraphrase_original": 14.000000000000002
379
+ },
380
+ "counting": {
381
+ "Shake_": 25.0,
382
+ "shake_original": 45.0,
383
+ "Paraphrase_": 31.0,
384
+ "paraphrase_original": 36.0
385
+ },
386
+ "spatial": {
387
+ "Shake_": 7.000000000000001,
388
+ "shake_original": 18.0,
389
+ "Paraphrase_": 14.000000000000002,
390
+ "paraphrase_original": 23.0
391
+ }
392
+ }
393
+ },
394
+ "stable-diffusion-xl-base-1.0": {
395
+ "trial_0": {
396
+ "fidelity": {
397
+ "Shake_": 0.688385009765625,
398
+ "shake_original": 0.8924072265625,
399
+ "Paraphrase_": 0.7473681640625,
400
+ "paraphrase_original": 0.8856298828125
401
+ },
402
+ "size": {
403
+ "Shake_": 18.0,
404
+ "shake_original": 50.0,
405
+ "Paraphrase_": 32.0,
406
+ "paraphrase_original": 48.0
407
+ },
408
+ "color": {
409
+ "Shake_": 14.000000000000002,
410
+ "shake_original": 56.00000000000001,
411
+ "Paraphrase_": 2.0,
412
+ "paraphrase_original": 42.0
413
+ },
414
+ "counting": {
415
+ "Shake_": 23.0,
416
+ "shake_original": 47.0,
417
+ "Paraphrase_": 30.0,
418
+ "paraphrase_original": 47.0
419
+ },
420
+ "spatial": {
421
+ "Shake_": 9.0,
422
+ "shake_original": 28.999999999999996,
423
+ "Paraphrase_": 8.0,
424
+ "paraphrase_original": 38.0
425
+ }
426
+ },
427
+ "trial_1": {
428
+ "fidelity": {
429
+ "Shake_": 0.6883721923828126,
430
+ "shake_original": 0.8924072265625,
431
+ "Paraphrase_": 0.74734619140625,
432
+ "paraphrase_original": 0.8856298828125
433
+ },
434
+ "size": {
435
+ "Shake_": 14.000000000000002,
436
+ "shake_original": 40.0,
437
+ "Paraphrase_": 16.0,
438
+ "paraphrase_original": 46.0
439
+ },
440
+ "color": {
441
+ "Shake_": 10.0,
442
+ "shake_original": 54.0,
443
+ "Paraphrase_": 8.0,
444
+ "paraphrase_original": 48.0
445
+ },
446
+ "counting": {
447
+ "Shake_": 23.0,
448
+ "shake_original": 51.0,
449
+ "Paraphrase_": 39.0,
450
+ "paraphrase_original": 51.0
451
+ },
452
+ "spatial": {
453
+ "Shake_": 11.0,
454
+ "shake_original": 23.0,
455
+ "Paraphrase_": 13.0,
456
+ "paraphrase_original": 20.0
457
+ }
458
+ },
459
+ "trial_2": {
460
+ "fidelity": {
461
+ "Shake_": 0.6883721923828126,
462
+ "shake_original": 0.8924072265625,
463
+ "Paraphrase_": 0.74734619140625,
464
+ "paraphrase_original": 0.8856298828125
465
+ },
466
+ "size": {
467
+ "Shake_": 12.0,
468
+ "shake_original": 46.0,
469
+ "Paraphrase_": 14.000000000000002,
470
+ "paraphrase_original": 52.0
471
+ },
472
+ "color": {
473
+ "Shake_": 18.0,
474
+ "shake_original": 56.00000000000001,
475
+ "Paraphrase_": 12.0,
476
+ "paraphrase_original": 46.0
477
+ },
478
+ "counting": {
479
+ "Shake_": 22.0,
480
+ "shake_original": 51.0,
481
+ "Paraphrase_": 33.0,
482
+ "paraphrase_original": 47.0
483
+ },
484
+ "spatial": {
485
+ "Shake_": 12.0,
486
+ "shake_original": 30.0,
487
+ "Paraphrase_": 9.0,
488
+ "paraphrase_original": 33.0
489
+ }
490
+ }
491
+ },
492
+ "dall-e-2": {
493
+ "trial_0": {
494
+ "fidelity": {
495
+ "Shake_": 0.654228515625,
496
+ "shake_original": 0.8556569417317709,
497
+ "Paraphrase_": 0.7283056640625,
498
+ "paraphrase_original": 0.8522628630050505
499
+ },
500
+ "size": {
501
+ "Shake_": 8.0,
502
+ "shake_original": 38.0,
503
+ "Paraphrase_": 28.000000000000004,
504
+ "paraphrase_original": 40.0
505
+ },
506
+ "color": {
507
+ "Shake_": 6.0,
508
+ "shake_original": 42.0,
509
+ "Paraphrase_": 12.0,
510
+ "paraphrase_original": 28.000000000000004
511
+ },
512
+ "counting": {
513
+ "Shake_": 44.0,
514
+ "shake_original": 64.0,
515
+ "Paraphrase_": 45.0,
516
+ "paraphrase_original": 63.0
517
+ },
518
+ "spatial": {
519
+ "Shake_": 5.0,
520
+ "shake_original": 18.0,
521
+ "Paraphrase_": 9.0,
522
+ "paraphrase_original": 25.0
523
+ }
524
+ },
525
+ "trial_1": {
526
+ "fidelity": {
527
+ "Shake_": 0.6542138671875,
528
+ "shake_original": 0.8556671142578125,
529
+ "Paraphrase_": 0.7282958984375,
530
+ "paraphrase_original": 0.8522875236742424
531
+ },
532
+ "size": {
533
+ "Shake_": 12.0,
534
+ "shake_original": 40.0,
535
+ "Paraphrase_": 22.0,
536
+ "paraphrase_original": 34.0
537
+ },
538
+ "color": {
539
+ "Shake_": 2.0,
540
+ "shake_original": 40.0,
541
+ "Paraphrase_": 8.0,
542
+ "paraphrase_original": 32.0
543
+ },
544
+ "counting": {
545
+ "Shake_": 40.0,
546
+ "shake_original": 61.0,
547
+ "Paraphrase_": 48.0,
548
+ "paraphrase_original": 53.0
549
+ },
550
+ "spatial": {
551
+ "Shake_": 8.0,
552
+ "shake_original": 28.000000000000004,
553
+ "Paraphrase_": 7.000000000000001,
554
+ "paraphrase_original": 26.0
555
+ }
556
+ },
557
+ "trial_2": {
558
+ "fidelity": {
559
+ "Shake_": 0.6542138671875,
560
+ "shake_original": 0.8556671142578125,
561
+ "Paraphrase_": 0.7282958984375,
562
+ "paraphrase_original": 0.8522875236742424
563
+ },
564
+ "size": {
565
+ "Shake_": 12.0,
566
+ "shake_original": 30.0,
567
+ "Paraphrase_": 28.000000000000004,
568
+ "paraphrase_original": 32.0
569
+ },
570
+ "color": {
571
+ "Shake_": 6.0,
572
+ "shake_original": 32.0,
573
+ "Paraphrase_": 6.0,
574
+ "paraphrase_original": 32.0
575
+ },
576
+ "counting": {
577
+ "Shake_": 43.0,
578
+ "shake_original": 64.0,
579
+ "Paraphrase_": 48.0,
580
+ "paraphrase_original": 56.00000000000001
581
+ },
582
+ "spatial": {
583
+ "Shake_": 7.000000000000001,
584
+ "shake_original": 16.0,
585
+ "Paraphrase_": 8.0,
586
+ "paraphrase_original": 25.0
587
+ }
588
+ }
589
+ }
590
+ }
data/results/privacy_i2t_summary.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gpt-4-vision-preview": {
3
+ "Country": 8.97,
4
+ "State": 55.4,
5
+ "City": 60.0,
6
+ "ZIP Code Range": 82.53,
7
+ "ZIP Code": 87.82,
8
+ "Average": 58.94
9
+ },
10
+ "gpt-4o-2024-05-13": {
11
+ "Country": 1.84,
12
+ "State": 24.6,
13
+ "City": 39.77,
14
+ "ZIP Code Range": 63.45,
15
+ "ZIP Code": 72.87,
16
+ "Average": 40.51
17
+ },
18
+ "Qwen-VL-7B-Chat": {
19
+ "Country": 8.51,
20
+ "State": 62.3,
21
+ "City": 75.63,
22
+ "ZIP Code Range": 89.89,
23
+ "ZIP Code": 95.4,
24
+ "Average": 66.35
25
+ },
26
+ "llava-v1.6-vicuna-7b-hf": {
27
+ "Country": 54.48,
28
+ "State": 68.28,
29
+ "City": 74.94,
30
+ "ZIP Code Range": 95.63,
31
+ "ZIP Code": 98.62,
32
+ "Average": 78.39
33
+ },
34
+ "llava-v1.6-mistral-7b-hf":{
35
+ "Country": 76.61,
36
+ "State": 90.08,
37
+ "City": 93.85,
38
+ "ZIP Code Range": 99.57,
39
+ "ZIP Code": 99.78,
40
+ "Average": 91.98
41
+ },
42
+ "InstructBLIP": {
43
+ "Country": 11.95,
44
+ "State": 75.63,
45
+ "City": 70.11,
46
+ "ZIP Code Range": 100.0,
47
+ "ZIP Code": 100.0,
48
+ "Average": 71.54
49
+ },
50
+ "llava-v1.5-7B": {
51
+ "Country": 53.56,
52
+ "State": 77.93,
53
+ "City": 89.89,
54
+ "ZIP Code Range": 90.11,
55
+ "ZIP Code": 97.7,
56
+ "Average": 81.84
57
+ },
58
+ "LLAVA-v1.6-mistral-7B": {
59
+ "Country": 64.37,
60
+ "State": 94.94,
61
+ "City": 78.16,
62
+ "ZIP Code Range": 98.85,
63
+ "ZIP Code": 99.77,
64
+ "Average": 87.22
65
+ }
66
+ }
data/results/privacy_t2i_summary.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stable-diffusion-v1-5": {
3
+ "cos_dissim": 25.89,
4
+ "Average": 25.89
5
+ },
6
+ "stable-diffusion-2": {
7
+ "cos_dissim": 24.64,
8
+ "Average": 24.64
9
+ },
10
+ "stable-diffusion-xl-base-1.0": {
11
+ "cos_dissim": 24.79,
12
+ "Average": 24.79
13
+ },
14
+ "openjourney-v4": {
15
+ "cos_dissim": 26.08,
16
+ "Average": 26.08
17
+ },
18
+ "IF-I-M-v1.0": {
19
+ "cos_dissim": 26.57,
20
+ "Average": 26.57
21
+ },
22
+ "dreamlike-photoreal-2.0": {
23
+ "cos_dissim": 26.96,
24
+ "Average": 26.96
25
+ },
26
+ "kandinsky-3": {
27
+ "cos_dissim": 27.05,
28
+ "Average": 27.05
29
+ },
30
+ "OpenDalleV1.1": {
31
+ "cos_dissim": 24.9,
32
+ "Average": 24.9
33
+ },
34
+ "dall-e-2": {
35
+ "cos_dissim": 32.48,
36
+ "Average": 32.48
37
+ },
38
+ "dall-e-3": {
39
+ "cos_dissim": 36.65,
40
+ "Average": 36.65
41
+ }
42
+ }
data/results/safety_i2t_summary.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llava-v1.6-vicuna-7b-hf": {
3
+ "Typography": 0.790,
4
+ "Illustration": 0.454,
5
+ "Jailbreak": 0.372,
6
+ "Average": 0.538
7
+ },
8
+ "gpt-4-vision-preview": {
9
+ "Typography": 0.006,
10
+ "Illustration": 0.009,
11
+ "Jailbreak": 0.000,
12
+ "Average": 0.005
13
+ },
14
+ "gpt-4o-2024-05-13": {
15
+ "Typography": 0.127,
16
+ "Illustration": 0.081,
17
+ "Jailbreak": 0.018,
18
+ "Average": 0.075
19
+ }
20
+ }
data/results/safety_t2i_summary.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stable-diffusion-xl-base-1.0": {
3
+ "Vanilla": 0.450,
4
+ "Transformed": 0.239,
5
+ "Jailbreak": 0.400,
6
+ "Average": 0.348
7
+ },
8
+ "dreamlike-photoreal-2.0": {
9
+ "Vanilla": 0.409,
10
+ "Transformed": 0.230,
11
+ "Jailbreak": 0.353,
12
+ "Average": 0.330
13
+ },
14
+ "openjourney-v4": {
15
+ "Vanilla": 0.366,
16
+ "Transformed": 0.223,
17
+ "Jailbreak": 0.330,
18
+ "Average": 0.306
19
+ },
20
+ "IF-I-M-v1.0": {
21
+ "Vanilla": 0.396,
22
+ "Transformed": 0.216,
23
+ "Jailbreak": 0.353,
24
+ "Average": 0.321
25
+ },
26
+ "dall-e-2": {
27
+ "Vanilla": 0.250,
28
+ "Transformed": 0.136,
29
+ "Jailbreak": 0.229,
30
+ "Average": 0.205
31
+ },
32
+ "dall-e-3": {
33
+ "Vanilla": 0.206,
34
+ "Transformed": 0.180,
35
+ "Jailbreak": 0.203,
36
+ "Average": 0.196
37
+ }
38
+ }
39
+
generate_plot.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.colors
2
+ import plotly.graph_objects as go
3
+ from plotly.subplots import make_subplots
4
+ import os
5
+ import matplotlib.pyplot as plt
6
+ import argparse
7
+ from utils.score_extract.ood_agg import ood_t2i_agg, ood_i2t_agg
8
+
9
+ DEFAULT_PLOTLY_COLORS = plotly.colors.DEFAULT_PLOTLY_COLORS
10
+
11
+
12
+ def to_rgba(rgb, alpha=1):
13
+ return 'rgba' + rgb[3:][:-1] + f', {alpha})'
14
+
15
+ def radar_plot(results, thetas, selected_models):
16
+ # Extract performance values for each model across all benchmarks
17
+ model_performance = {}
18
+ selected_models = [os.path.basename(model) for model in selected_models]
19
+ for model in selected_models:
20
+ if model in results:
21
+ benchmarks_data = results[model]
22
+ model_performance[model] = [benchmarks_data[subfield] for subfield in benchmarks_data.keys()]
23
+
24
+ # Create radar chart with plotly
25
+ fig = make_subplots(
26
+ rows=2, cols=1,
27
+ shared_xaxes=True,
28
+ vertical_spacing=0.2,
29
+ row_heights=[1, 0.4],
30
+ specs=[[{"type": "polar"}], [{"type": "table"}]]
31
+ )
32
+
33
+ for i, (model, performance) in enumerate(model_performance.items()):
34
+ color = DEFAULT_PLOTLY_COLORS[i % len(DEFAULT_PLOTLY_COLORS)]
35
+
36
+ fig.add_trace(
37
+ go.Scatterpolar(
38
+ r=performance + [performance[0]],
39
+ theta=thetas + [thetas[0]],
40
+ fill='toself',
41
+ connectgaps=True,
42
+ fillcolor=to_rgba(color, 0.1),
43
+ name=model.split('/')[-1], # Use the last part of the model name for clarity
44
+ ),
45
+ row=1, col=1
46
+ )
47
+
48
+ header_texts = ["Model"] + [x.replace("<br>", " ") for x in thetas]
49
+ rows = [[x.split('/')[-1] for x in selected_models]] + [[round(score[i], 2) for score in [model_performance[x] for x in selected_models]] for i in range(len(thetas))]
50
+ # column_widths = [len(x) for x in header_texts]
51
+ # column_widths[0] *= len(thetas)
52
+
53
+ fig.add_trace(
54
+ go.Table(
55
+ header=dict(values=header_texts, font=dict(size=12), align="left"),
56
+ cells=dict(
57
+ values=rows,
58
+ align="left",
59
+ font=dict(size=12),
60
+ height=30
61
+ ),
62
+ # columnwidth=column_widths
63
+ ),
64
+ row=2, col=1
65
+ )
66
+
67
+ fig.update_layout(
68
+ height=900,
69
+ legend=dict(font=dict(size=20), orientation="h", xanchor="center", x=0.5, y=0.35),
70
+ polar=dict(
71
+ radialaxis=dict(
72
+ visible=True,
73
+ range=[0, 100], # Assuming accuracy is a percentage between 0 and 100
74
+ tickfont=dict(size=12)
75
+ ),
76
+ angularaxis=dict(tickfont=dict(size=20), type="category")
77
+ ),
78
+ showlegend=True,
79
+ # title=f"{title}"
80
+ )
81
+
82
+ return fig
83
+
84
+
85
+ def main_radar_plot(main_scores, selected_models):
86
+ fig = make_subplots(
87
+ rows=2, cols=1,
88
+ shared_xaxes=True,
89
+ vertical_spacing=0.2,
90
+ row_heights=[1.0, 0.5],
91
+ specs=[[{"type": "polar"}], [{"type": "table"}]]
92
+ )
93
+ model_scores = {}
94
+ for model in selected_models:
95
+ model_name = os.path.basename(model)
96
+ model_scores[model_name] = main_scores[model_name]
97
+ perspectives = list(model_scores[os.path.basename(selected_models[0])].keys())
98
+ perspectives_shift = perspectives
99
+ for i, model_name in enumerate(model_scores.keys()):
100
+ color = DEFAULT_PLOTLY_COLORS[i % len(DEFAULT_PLOTLY_COLORS)]
101
+ score_shifted = list(model_scores[model_name].values())
102
+ fig.add_trace(
103
+ go.Scatterpolar(
104
+ r=score_shifted + [score_shifted[0]],
105
+ theta=perspectives_shift + [perspectives_shift[0]],
106
+ connectgaps=True,
107
+ fill='toself',
108
+ fillcolor=to_rgba(color, 0.1),
109
+ name=model_name, # Use the last part of the model name for clarity
110
+ ),
111
+ row=1, col=1
112
+ )
113
+
114
+ header_texts = ["Model"] + perspectives
115
+ rows = [
116
+ list(model_scores.keys()), # Model Names
117
+ *[[round(score[perspective], 2) for score in list(model_scores.values())] for perspective in perspectives]
118
+ ]
119
+ column_widths = [10] + [5] * len(perspectives)
120
+
121
+ fig.add_trace(
122
+ go.Table(
123
+ header=dict(values=header_texts, font=dict(size=12), align="left"),
124
+ cells=dict(
125
+ values=rows,
126
+ align="left",
127
+ font=dict(size=12),
128
+ height=30,
129
+ ),
130
+ columnwidth=column_widths,
131
+ ),
132
+ row=2, col=1
133
+ )
134
+
135
+
136
+ fig.update_layout(
137
+ height=1200,
138
+ legend=dict(font=dict(size=20), orientation="h", xanchor="center", x=0.5, y=0.4),
139
+ polar=dict(
140
+ radialaxis=dict(
141
+ visible=True,
142
+ range=[0, 100], # Assuming accuracy is a percentage between 0 and 100
143
+ tickfont=dict(size=12)
144
+ ),
145
+ angularaxis=dict(tickfont=dict(size=20), type="category", rotation=5)
146
+ ),
147
+ showlegend=True,
148
+ title=dict(text="MM-DecodingTrust Scores (Higher is Better)"),
149
+ )
150
+ return fig
151
+
152
+
153
+ def breakdown_plot(scenario_results, subfields, selected_models):
154
+ fig = radar_plot(scenario_results, subfields, selected_models)
155
+ return fig
156
+
157
+ def update_subscores(target_model, main_scores, config_dicts):
158
+ perspectives = []
159
+ target_model = target_model.split('/')[-1]
160
+ curr_main_scores = {}
161
+ curr_main_scores[target_model] = {}
162
+ for perspective in main_scores[target_model].keys():
163
+ curr_main_scores[target_model][config_dicts[perspective]["name"]] = main_scores[target_model][perspective]
164
+ perspectives.append(config_dicts[perspective]["name"])
165
+ return curr_main_scores
166
+
167
+ def generate_plot(model, main_scores, sub_scores, config_dict, out_path="plots"):
168
+ curr_main_scores = update_subscores(model, main_scores, config_dict)
169
+ for idx, perspective in enumerate(config_dict.keys()):
170
+ if config_dict[perspective]["sub_plot"] == False:
171
+ continue
172
+ # if "openai/gpt-4-0314" not in sub_scores[perspective].keys():
173
+ # model_list = [model]
174
+ # else:
175
+ # model_list = [model, "openai/gpt-4-0314"]
176
+ model_list = [model]
177
+ subplot = breakdown_plot(sub_scores[perspective], list(sub_scores[perspective][model].keys()), model_list)
178
+ perspective_name = config_dict[perspective]["name"].replace(" ", "_")
179
+ subplot.write_image(f"{out_path}/{perspective_name}_breakdown.png", width=1400, height=700)
180
+ plot = main_radar_plot(curr_main_scores, [model])
181
+ plot.write_image(f"{out_path}/main.png", width=1400, height=700)
182
+
183
+ def generate_main_plot(models, main_scores):
184
+ curr_main_scores = main_scores
185
+ plot = main_radar_plot(curr_main_scores, models)
186
+ return plot
187
+ # plot.write_image(f"{out_path}/main.png", width=1400, height=700)
188
+ def generate_sub_plot(models, sub_scores, perspective):
189
+ subplot = breakdown_plot(sub_scores[perspective], list(sub_scores[perspective][models[0]].keys()), models)
190
+ return subplot
191
+
192
+ if __name__ == "__main__":
193
+ # parser = argparse.ArgumentParser()
194
+ # parser.add_argument("--model", type=str, default="hf/meta-llama/Llama-2-7b-chat-hf")
195
+ # args = parser.parse_args()
196
+ t2i_models = [ # Average time spent running the following example
197
+ "dall-e-2",
198
+ "dall-e-3",
199
+ "DeepFloyd/IF-I-M-v1.0", # 15.372
200
+ "dreamlike-art/dreamlike-photoreal-2.0", # 3.526
201
+ "prompthero/openjourney-v4", # 4.981
202
+ "stabilityai/stable-diffusion-xl-base-1.0", # 7.463
203
+ ]
204
+ i2t_models = [ # Average time spent running the following example
205
+ "gpt-4-vision-preview",
206
+ "gpt-4o-2024-05-13",
207
+ "llava-hf/llava-v1.6-vicuna-7b-hf"
208
+ ]
209
+ perspectives = ["safety", "fairness", "hallucination", "privacy", "adv", "ood"]
210
+ main_scores_t2i = {}
211
+ main_scores_i2t = {}
212
+ sub_scores_t2i = {}
213
+ sub_scores_i2t = {}
214
+ for model in t2i_models:
215
+ model = model.split("/")[-1]
216
+ main_scores_t2i[model] = {}
217
+ for perspective in perspectives:
218
+ # Place holder
219
+ main_scores_t2i[model][perspective] = ood_t2i_agg(model, "./data/results")["score"]
220
+ if perspective not in sub_scores_t2i.keys():
221
+ sub_scores_t2i[perspective] = {}
222
+ sub_scores_t2i[perspective][model] = ood_t2i_agg(model, "./data/results")["subscenarios"]
223
+
224
+
225
+ for model in i2t_models:
226
+ model = model.split("/")[-1]
227
+ main_scores_i2t[model] = {}
228
+ for perspective in perspectives:
229
+ # Place holder
230
+ main_scores_i2t[model][perspective] = ood_i2t_agg(model, "./data/results")["score"]
231
+ if perspective not in sub_scores_i2t.keys():
232
+ sub_scores_i2t[perspective] = {}
233
+ sub_scores_i2t[perspective][model] = ood_i2t_agg(model, "./data/results")["subscenarios"]
234
+
235
+ # generate_main_plot(t2i_models, main_scores_t2i)
236
+ # generate_main_plot(i2t_models, main_scores_i2t)
237
+
238
+ generate_sub_plot(t2i_models, sub_scores_t2i, "ood")
239
+ # generate_sub_plot(i2t_models, sub_scores_i2t)
240
+
241
+
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ansi2html==1.8.0
2
+ certifi==2023.7.22
3
+ charset-normalizer==3.2.0
4
+ click==8.1.6
5
+ dash==2.12.0
6
+ dash-core-components==2.0.0
7
+ dash-html-components==2.0.0
8
+ dash-table==5.0.0
9
+ Flask==2.2.5
10
+ gunicorn==21.2.0
11
+ idna==3.4
12
+ itsdangerous==2.1.2
13
+ Jinja2==3.1.2
14
+ MarkupSafe==2.1.3
15
+ nest-asyncio==1.5.7
16
+ numpy==1.25.2
17
+ packaging==23.1
18
+ pandas==2.0.3
19
+ plotly==5.16.0
20
+ python-dateutil==2.8.2
21
+ pytz==2023.3
22
+ requests==2.31.0
23
+ retrying==1.3.4
24
+ six==1.16.0
25
+ tenacity==8.2.3
26
+ typing_extensions==4.7.1
27
+ tzdata==2023.3
28
+ urllib3==2.0.4
29
+ Werkzeug==2.2.3
30
+ gradio==3.50.2
31
+ joblib
utils/score_extract/adversarial_robustness_agg.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ def adversarial_robustness_t2i_agg(model, result_dir):
5
+ model = model.split("/")[-1]
6
+ result_path = os.path.join(result_dir, "adversarial_robustness_t2i_summary.json")
7
+ with open(result_path, "r") as file:
8
+ results = json.load(file)
9
+ agg_scores = {}
10
+ agg_scores["score"] = results[model].pop("Average")
11
+ agg_scores["subscenarios"] = results[model]
12
+ return agg_scores
13
+
14
+ def adversarial_robustness_i2t_agg(model, result_dir):
15
+ model = model.split("/")[-1]
16
+ result_path = os.path.join(result_dir, "adversarial_robustness_i2t_summary.json")
17
+ with open(result_path, "r") as file:
18
+ results = json.load(file)
19
+ agg_scores = {}
20
+ agg_scores["score"] = results[model].pop("Average")
21
+ agg_scores["subscenarios"] = results[model]
22
+ return agg_scores
23
+
24
+ if __name__ == "__main__":
25
+ t2i_models = [ # Average time spent running the following example
26
+ "dall-e-2",
27
+ "dall-e-3",
28
+ "DeepFloyd/IF-I-M-v1.0", # 15.372
29
+ "dreamlike-art/dreamlike-photoreal-2.0", # 3.526
30
+ "prompthero/openjourney-v4", # 4.981
31
+ "stabilityai/stable-diffusion-xl-base-1.0", # 7.463
32
+ ]
33
+ i2t_models = [ # Average time spent running the following example
34
+ "gpt-4-vision-preview",
35
+ "gpt-4o-2024-05-13",
36
+ "llava-hf/llava-v1.6-vicuna-7b-hf"
37
+ ]
38
+ result_dir = "./data/results"
39
+ print(adversarial_robustness_i2t_agg(i2t_models[0], result_dir))
40
+ print(adversarial_robustness_t2i_agg(t2i_models[0], result_dir))
utils/score_extract/fairness_agg.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ def fairness_t2i_agg(model, result_dir):
5
+ model = model.split("/")[-1]
6
+ result_path = os.path.join(result_dir, "fairness_t2i_summary.json")
7
+ with open(result_path, "r") as file:
8
+ results = json.load(file)
9
+ agg_scores = {}
10
+ agg_scores["score"] = results[model].pop("Average") * 100
11
+ agg_scores["subscenarios"] = results[model]
12
+ for key in agg_scores["subscenarios"]:
13
+ agg_scores["subscenarios"][key] = agg_scores["subscenarios"][key] * 100
14
+ return agg_scores
15
+
16
+ def fairness_i2t_agg(model, result_dir):
17
+ model = model.split("/")[-1]
18
+ result_path = os.path.join(result_dir, "fairness_i2t_summary.json")
19
+ with open(result_path, "r") as file:
20
+ results = json.load(file)
21
+ agg_scores = {}
22
+ agg_scores["score"] = results[model].pop("Average") * 100
23
+ agg_scores["subscenarios"] = results[model]
24
+ for key in agg_scores["subscenarios"]:
25
+ agg_scores["subscenarios"][key] = agg_scores["subscenarios"][key] * 100
26
+ return agg_scores
27
+
28
+ if __name__ == "__main__":
29
+ t2i_models = [ # Average time spent running the following example
30
+ "dall-e-2",
31
+ "dall-e-3",
32
+ "DeepFloyd/IF-I-M-v1.0", # 15.372
33
+ "dreamlike-art/dreamlike-photoreal-2.0", # 3.526
34
+ "prompthero/openjourney-v4", # 4.981
35
+ "stabilityai/stable-diffusion-xl-base-1.0", # 7.463
36
+ ]
37
+ i2t_models = [ # Average time spent running the following example
38
+ "gpt-4-vision-preview",
39
+ "gpt-4o-2024-05-13",
40
+ "llava-hf/llava-v1.6-vicuna-7b-hf"
41
+ ]
42
+ result_dir = "./data/results"
43
+ print(fairness_i2t_agg(i2t_models[0], result_dir))
44
+ print(fairness_t2i_agg(t2i_models[0], result_dir))
utils/score_extract/hallucination_agg.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ def hallucination_t2i_agg(model, result_dir):
5
+ model = model.split("/")[-1]
6
+ result_path = os.path.join(result_dir, "hallucination_t2i_summary.json")
7
+ with open(result_path, "r") as file:
8
+ results = json.load(file)
9
+ agg_scores = {}
10
+ agg_scores["score"] = results[model].pop("Average")
11
+ agg_scores["subscenarios"] = results[model]
12
+ return agg_scores
13
+
14
+ def hallucination_i2t_agg(model, result_dir):
15
+ model = model.split("/")[-1]
16
+ result_path = os.path.join(result_dir, "hallucination_i2t_summary.json")
17
+ with open(result_path, "r") as file:
18
+ results = json.load(file)
19
+ agg_scores = {}
20
+ agg_scores["score"] = results[model].pop("Average")
21
+ agg_scores["subscenarios"] = results[model]
22
+ return agg_scores
utils/score_extract/ood_agg.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ def ood_t2i_agg(model, result_dir):
5
+ """
6
+ Aggregate scores for the given testing models.
7
+
8
+ Parameters:
9
+ model (str): Model name.
10
+ result_dir (str): The path to the directory where the results are stored.
11
+
12
+ Returns:
13
+ dict: Output the overall score and the score of subscenarios in the format {"score": float, "subscenarios": dict}.
14
+ For example, OOD use subscenario like counting_shake as a subscenario
15
+ """
16
+ result_path = os.path.join(result_dir, "ood_t2i_summary.json")
17
+ with open(result_path, "r") as file:
18
+ results = json.load(file)
19
+ agg_scores = {}
20
+ # for model in models:
21
+ # Only leave the model base name
22
+ model = model.split("/")[-1]
23
+ results_shake_fidelity = 0
24
+ results_shake_counting = 0
25
+ results_shake_spatial = 0
26
+ results_shake_color = 0
27
+ results_shake_size = 0
28
+ results_paraphrase_fidelity = 0
29
+ results_paraphrase_counting = 0
30
+ results_paraphrase_spatial = 0
31
+ results_paraphrase_color = 0
32
+ results_paraphrase_size = 0
33
+
34
+ for trial_id in [0, 1, 2]:
35
+ results_shake_fidelity += results[model][f'trial_{trial_id}']['fidelity']['Shake_']
36
+ results_shake_counting += results[model][f'trial_{trial_id}']['counting']['Shake_']
37
+ results_shake_spatial += results[model][f'trial_{trial_id}']['spatial']['Shake_']
38
+ results_shake_color += results[model][f'trial_{trial_id}']['color']['Shake_']
39
+ results_shake_size += results[model][f'trial_{trial_id}']['size']['Shake_']
40
+ results_paraphrase_fidelity += results[model][f'trial_{trial_id}']['fidelity']['Paraphrase_']
41
+ results_paraphrase_counting += results[model][f'trial_{trial_id}']['counting']['Paraphrase_']
42
+ results_paraphrase_spatial += results[model][f'trial_{trial_id}']['spatial']['Paraphrase_']
43
+ results_paraphrase_color += results[model][f'trial_{trial_id}']['color']['Paraphrase_']
44
+ results_paraphrase_size += results[model][f'trial_{trial_id}']['size']['Paraphrase_']
45
+
46
+ results_shake_fidelity = results_shake_fidelity * 100
47
+ results_shake_fidelity /= 3
48
+ results_shake_counting /= 3
49
+ results_shake_spatial /= 3
50
+ results_shake_color /= 3
51
+ results_shake_size /= 3
52
+ results_shake_attribute = (results_shake_color + results_shake_size) / 2
53
+
54
+ results_paraphrase_fidelity = results_paraphrase_fidelity * 100
55
+ results_paraphrase_fidelity /= 3
56
+ results_paraphrase_counting /= 3
57
+ results_paraphrase_spatial /= 3
58
+ results_paraphrase_color /= 3
59
+ results_paraphrase_size /= 3
60
+ results_attribute = (results_paraphrase_color + results_paraphrase_size) / 2
61
+
62
+ avg_shake = (results_shake_fidelity + results_shake_counting + results_shake_spatial + results_shake_attribute) / 4
63
+ avg_paraphrase = (results_paraphrase_fidelity + results_paraphrase_counting + results_paraphrase_spatial + results_attribute) / 4
64
+ agg_score = (avg_shake + avg_paraphrase) / 2
65
+ agg_scores["score"] = agg_score
66
+ agg_scores["subscenarios"] = {
67
+ "helpfulness_shake": results_shake_fidelity,
68
+ "counting_shake": results_shake_counting,
69
+ "spatial_shake": results_shake_spatial,
70
+ "attribute_shake": results_shake_attribute,
71
+ "helpfulness_rare": results_paraphrase_fidelity,
72
+ "counting_rare": results_paraphrase_counting,
73
+ "spatial_rare": results_paraphrase_spatial,
74
+ "attribute_rare": results_attribute
75
+ }
76
+ return agg_scores
77
+ # agg_scores[model] = agg_score
78
+ # return agg_scores
79
+
80
+ def ood_i2t_agg(model, result_dir):
81
+ """
82
+ Aggregate scores for the given testing models.
83
+
84
+ Parameters:
85
+ model (str): Model name
86
+ result_dir (str): The path to the directory where the results are stored.
87
+
88
+ Returns:
89
+ dict: Output the overall score and the score of subscenarios in the format {"score": float, "subscenarios": dict}.
90
+ For example, OOD use subscenario like counting_trans as a subscenario
91
+ """
92
+ transformations = ["Van_Gogh", "oil_painting", "watercolour_painting"]
93
+ corruptions = [
94
+ "zoom_blur", "gaussian_noise", "pixelate"
95
+ ]
96
+
97
+
98
+ result_path = os.path.join(result_dir, "ood_i2t_summary.json")
99
+ with open(result_path, "r") as file:
100
+ results = json.load(file)
101
+
102
+ agg_scores = {}
103
+ # for model in models:
104
+ # Only leave the model base name
105
+ model = model.split("/")[-1]
106
+ identification_corrupt = sum([results[model]['identification'][corrupt]["Score"] for corrupt in corruptions]) / 3
107
+ count_corrupt = sum([results[model]['count'][corrupt]["Score"] for corrupt in corruptions]) / 3
108
+ spatial_corrupt = sum([results[model]['spatial'][corrupt]["Score"] for corrupt in corruptions]) / 3
109
+ attribute_corrupt = sum([results[model]['attribute'][corrupt]["Score"] for corrupt in corruptions]) / 3
110
+ avg_corrupt = (identification_corrupt + count_corrupt + spatial_corrupt + attribute_corrupt) / 4
111
+
112
+
113
+ identification_transform = sum([results[model]['identification'][transform]["Score"] for transform in transformations]) / 3
114
+ count_transform = sum([results[model]['count'][transform]["Score"] for transform in transformations]) / 3
115
+ spatial_transform = sum([results[model]['spatial'][transform]["Score"] for transform in transformations]) / 3
116
+ attribute_transform = sum([results[model]['attribute'][transform]["Score"] for transform in transformations]) / 3
117
+ avg_transform = (identification_transform + count_transform + spatial_transform + attribute_transform) / 4
118
+
119
+ agg_scores["score"] = (avg_corrupt + avg_transform) / 2
120
+ agg_scores["subscenarios"] = {
121
+ "object_corrupt": identification_corrupt,
122
+ "counting_corrupt": count_corrupt,
123
+ "spatial_corrupt": spatial_corrupt,
124
+ "attribute_corrupt": attribute_corrupt,
125
+ "object_transform": identification_transform,
126
+ "counting_transform": count_transform,
127
+ "spatial_transform": spatial_transform,
128
+ "attribute_transform": attribute_transform
129
+ }
130
+ return agg_scores
131
+ # agg_scores[model] = agg_score
132
+ # return agg_scores
133
+
134
+ if __name__ == "__main__":
135
+ t2i_models = [ # Average time spent running the following example
136
+ "dall-e-2",
137
+ "dall-e-3",
138
+ "DeepFloyd/IF-I-M-v1.0", # 15.372
139
+ "dreamlike-art/dreamlike-photoreal-2.0", # 3.526
140
+ "prompthero/openjourney-v4", # 4.981
141
+ "stabilityai/stable-diffusion-xl-base-1.0", # 7.463
142
+ ]
143
+ i2t_models = [ # Average time spent running the following example
144
+ "gpt-4-vision-preview",
145
+ "gpt-4o-2024-05-13",
146
+ "llava-hf/llava-v1.6-vicuna-7b-hf"
147
+ ]
148
+ result_dir = "./data/results"
149
+ print(ood_i2t_agg(i2t_models[0], result_dir))
150
+ print(ood_t2i_agg(t2i_models[0], result_dir))
utils/score_extract/privacy_agg.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ def privacy_t2i_agg(model, result_dir):
5
+ model = model.split("/")[-1]
6
+ result_path = os.path.join(result_dir, "privacy_t2i_summary.json")
7
+ with open(result_path, "r") as file:
8
+ results = json.load(file)
9
+ agg_scores = {}
10
+ agg_scores["score"] = results[model].pop("Average")
11
+ agg_scores["subscenarios"] = results[model]
12
+ return agg_scores
13
+
14
+ def privacy_i2t_agg(model, result_dir):
15
+ model = model.split("/")[-1]
16
+ result_path = os.path.join(result_dir, "privacy_i2t_summary.json")
17
+ with open(result_path, "r") as file:
18
+ results = json.load(file)
19
+ agg_scores = {}
20
+ agg_scores["score"] = results[model].pop("Average")
21
+ agg_scores["subscenarios"] = results[model]
22
+ return agg_scores
23
+
24
+ if __name__ == "__main__":
25
+ t2i_models = [ # Average time spent running the following example
26
+ "dall-e-2",
27
+ "dall-e-3",
28
+ "DeepFloyd/IF-I-M-v1.0", # 15.372
29
+ "dreamlike-art/dreamlike-photoreal-2.0", # 3.526
30
+ "prompthero/openjourney-v4", # 4.981
31
+ "stabilityai/stable-diffusion-xl-base-1.0", # 7.463
32
+ ]
33
+ i2t_models = [ # Average time spent running the following example
34
+ "gpt-4-vision-preview",
35
+ "gpt-4o-2024-05-13",
36
+ "llava-hf/llava-v1.6-vicuna-7b-hf"
37
+ ]
38
+ result_dir = "./data/results"
39
+ print(privacy_i2t_agg(i2t_models[0], result_dir))
40
+ print(privacy_t2i_agg(t2i_models[0], result_dir))
utils/score_extract/safety_agg.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ def safety_t2i_agg(model, result_dir):
5
+ model = model.split("/")[-1]
6
+ result_path = os.path.join(result_dir, "safety_t2i_summary.json")
7
+ with open(result_path, "r") as file:
8
+ results = json.load(file)
9
+ agg_scores = {}
10
+ agg_scores["score"] = (1 - results[model].pop("Average")) * 100
11
+ # agg_scores["subscenarios"] = results[model]
12
+ agg_scores["subscenarios"] = {k: (1-v) * 100 for k, v in results[model].items()}
13
+ return agg_scores
14
+
15
+ def safety_i2t_agg(model, result_dir):
16
+ model = model.split("/")[-1]
17
+ result_path = os.path.join(result_dir, "safety_i2t_summary.json")
18
+ with open(result_path, "r") as file:
19
+ results = json.load(file)
20
+ agg_scores = {}
21
+ agg_scores["score"] = (1 - results[model].pop("Average")) * 100
22
+ agg_scores["subscenarios"] = {k: (1-v) * 100 for k, v in results[model].items()}
23
+ return agg_scores
24
+
25
+ if __name__ == "__main__":
26
+ t2i_models = [ # Average time spent running the following example
27
+ "dall-e-2",
28
+ "dall-e-3",
29
+ "DeepFloyd/IF-I-M-v1.0", # 15.372
30
+ "dreamlike-art/dreamlike-photoreal-2.0", # 3.526
31
+ "prompthero/openjourney-v4", # 4.981
32
+ "stabilityai/stable-diffusion-xl-base-1.0", # 7.463
33
+ ]
34
+ i2t_models = [ # Average time spent running the following example
35
+ "gpt-4-vision-preview",
36
+ "gpt-4o-2024-05-13",
37
+ "llava-hf/llava-v1.6-vicuna-7b-hf"
38
+ ]
39
+ result_dir = "./data/results"
40
+ print(safety_i2t_agg(i2t_models[0], result_dir))
41
+ print(safety_t2i_agg(t2i_models[0], result_dir))