radames commited on
Commit
177316b
1 Parent(s): 0d3bfb7
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.whl filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ venv/
3
+ public/
4
+ *.pem
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
  title: Enhance This HiDiffusion SDXL
3
- emoji: 🌖
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
7
  sdk_version: 4.29.0
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Enhance This HiDiffusion SDXL
3
+ emoji: 🔍🕵️
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
7
  sdk_version: 4.29.0
8
  app_file: app.py
9
  pinned: false
10
+ suggested_hardware: t4-medium
11
+ disable_embedding: true
12
+ short_description: Creative Upscaler High-Res Image Generation HiDiffusion SDXL
13
  ---
 
 
app.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from gradio_imageslider import ImageSlider
4
+ import torch
5
+ from hidiffusion import apply_hidiffusion
6
+ from diffusers import (
7
+ ControlNetModel,
8
+ StableDiffusionXLControlNetImg2ImgPipeline,
9
+ DDIMScheduler,
10
+ )
11
+
12
+ from compel import Compel, ReturnedEmbeddingsType
13
+ from PIL import Image
14
+ import os
15
+ import time
16
+ import cv2
17
+ import numpy as np
18
+
19
+
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ dtype = torch.float16
22
+
23
+ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
24
+
25
+ print(f"device: {device}")
26
+ print(f"dtype: {dtype}")
27
+ print(f"low memory: {LOW_MEMORY}")
28
+
29
+
30
+ model = "stabilityai/stable-diffusion-xl-base-1.0"
31
+ # vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=dtype)
32
+ scheduler = DDIMScheduler.from_pretrained(model, subfolder="scheduler")
33
+ controlnet = ControlNetModel.from_pretrained(
34
+ "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
35
+ )
36
+ pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
37
+ model,
38
+ controlnet=controlnet,
39
+ torch_dtype=dtype,
40
+ variant="fp16",
41
+ use_safetensors=True,
42
+ scheduler=scheduler,
43
+ )
44
+ pipe.enable_xformers_memory_efficient_attention()
45
+ # pipe.enable_model_cpu_offload()
46
+ pipe.enable_vae_tiling()
47
+ apply_hidiffusion(pipe)
48
+
49
+ compel = Compel(
50
+ tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
51
+ text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
52
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
53
+ requires_pooled=[False, True],
54
+ )
55
+ pipe = pipe.to(device)
56
+
57
+
58
+ def pad_image(image):
59
+ w, h = image.size
60
+ if w == h:
61
+ return image
62
+ elif w > h:
63
+ new_image = Image.new(image.mode, (w, w), (0, 0, 0))
64
+ pad_w = 0
65
+ pad_h = (w - h) // 2
66
+ new_image.paste(image, (0, pad_h))
67
+ return new_image
68
+ else:
69
+ new_image = Image.new(image.mode, (h, h), (0, 0, 0))
70
+ pad_w = (h - w) // 2
71
+ pad_h = 0
72
+ new_image.paste(image, (pad_w, 0))
73
+ return new_image
74
+
75
+
76
+ @spaces.GPU
77
+ def predict(
78
+ input_image,
79
+ prompt,
80
+ negative_prompt,
81
+ seed,
82
+ controlnet_conditioning_scale,
83
+ guidance_scale=8.5,
84
+ scale=2,
85
+ strength=1.0,
86
+ controlnet_start=0.0,
87
+ controlnet_end=1.0,
88
+ progress=gr.Progress(track_tqdm=True),
89
+ ):
90
+ if input_image is None:
91
+ raise gr.Error("Please upload an image.")
92
+ padded_image = pad_image(input_image).resize((1024, 1024)).convert("RGB")
93
+ conditioning, pooled = compel([prompt, negative_prompt])
94
+ generator = torch.manual_seed(seed)
95
+ last_time = time.time()
96
+ canny_image = np.array(padded_image)
97
+ canny_image = cv2.Canny(canny_image, 100, 200)
98
+ canny_image = canny_image[:, :, None]
99
+ canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
100
+ canny_image = Image.fromarray(canny_image)
101
+ images = pipe(
102
+ image=padded_image,
103
+ control_image=canny_image,
104
+ strength=strength,
105
+ prompt_embeds=conditioning[0:1],
106
+ pooled_prompt_embeds=pooled[0:1],
107
+ negative_prompt_embeds=conditioning[1:2],
108
+ negative_pooled_prompt_embeds=pooled[1:2],
109
+ width=1024 * scale,
110
+ height=1024 * scale,
111
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
112
+ controlnet_start=controlnet_start,
113
+ controlnet_end=controlnet_end,
114
+ generator=generator,
115
+ num_inference_steps=40,
116
+ guidance_scale=guidance_scale,
117
+ eta=1.0,
118
+ )
119
+ print(f"Time taken: {time.time() - last_time}")
120
+ return (padded_image, images.images[0])
121
+
122
+
123
+ css = """
124
+ #intro{
125
+ # max-width: 32rem;
126
+ # text-align: center;
127
+ # margin: 0 auto;
128
+ }
129
+ """
130
+
131
+ with gr.Blocks(css=css) as demo:
132
+ gr.Markdown(
133
+ """
134
+ # Enhance This
135
+ ### DemoFusion SDXL
136
+
137
+ [DemoFusion](https://ruoyidu.github.io/demofusion/demofusion.html) enables higher-resolution image generation.
138
+ You can upload an initial image and prompt to generate an enhanced version.
139
+ [Duplicate Space](https://huggingface.co/spaces/radames/Enhance-This-DemoFusion-SDXL?duplicate=true) to avoid the queue.
140
+ GPU Time Comparison: T4: ~276s - A10G: ~113.6s A100: ~43.5s RTX 4090: ~48.1s
141
+
142
+ <small>
143
+ <b>Notes</b> The author advises against the term "super resolution" because it's more like image-to-image generation than enhancement, but it's still a lot of fun!
144
+
145
+ </small>
146
+ """,
147
+ elem_id="intro",
148
+ )
149
+ with gr.Row():
150
+ with gr.Column(scale=1):
151
+ image_input = gr.Image(type="pil", label="Input Image")
152
+ prompt = gr.Textbox(
153
+ label="Prompt",
154
+ info="The prompt is very important to get the desired results. Please try to describe the image as best as you can. Accepts Compel Syntax",
155
+ )
156
+ negative_prompt = gr.Textbox(
157
+ label="Negative Prompt",
158
+ value="blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
159
+ )
160
+ seed = gr.Slider(
161
+ minimum=0,
162
+ maximum=2**64 - 1,
163
+ value=1415926535897932,
164
+ step=1,
165
+ label="Seed",
166
+ randomize=True,
167
+ )
168
+ with gr.Accordion(label="Advanced", open=False):
169
+ guidance_scale = gr.Slider(
170
+ minimum=0,
171
+ maximum=50,
172
+ value=8.5,
173
+ step=0.001,
174
+ label="Guidance Scale",
175
+ )
176
+ scale = gr.Slider(
177
+ minimum=1,
178
+ maximum=5,
179
+ value=2,
180
+ step=1,
181
+ label="Magnification Scale",
182
+ # interactive=False,
183
+ )
184
+ controlnet_conditioning_scale = gr.Slider(
185
+ minimum=0,
186
+ maximum=1,
187
+ step=0.001,
188
+ value=0.5,
189
+ label="ControlNet Conditioning Scale",
190
+ )
191
+ strength = gr.Slider(
192
+ minimum=0,
193
+ maximum=2,
194
+ step=0.001,
195
+ value=1,
196
+ label="Strength",
197
+ )
198
+ controlnet_start = gr.Slider(
199
+ minimum=0,
200
+ maximum=1,
201
+ step=0.001,
202
+ value=0.0,
203
+ label="ControlNet Start",
204
+ )
205
+ controlnet_end = gr.Slider(
206
+ minimum=0.0,
207
+ maximum=1.0,
208
+ step=0.001,
209
+ value=1.0,
210
+ label="ControlNet End",
211
+ )
212
+
213
+ btn = gr.Button()
214
+ with gr.Column(scale=2):
215
+ image_slider = ImageSlider(position=0.5)
216
+ inputs = [
217
+ image_input,
218
+ prompt,
219
+ negative_prompt,
220
+ seed,
221
+ controlnet_conditioning_scale,
222
+ guidance_scale,
223
+ scale,
224
+ strength,
225
+ controlnet_start,
226
+ controlnet_end,
227
+ ]
228
+ outputs = [image_slider]
229
+ btn.click(predict, inputs=inputs, outputs=outputs, concurrency_limit=1)
230
+ gr.Examples(
231
+ fn=predict,
232
+ examples=[
233
+ [
234
+ "./examples/lara.jpeg",
235
+ "photography of lara croft 8k high definition award winning",
236
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
237
+ 5436236241,
238
+ 0.5,
239
+ 8.5,
240
+ 3,
241
+ 0.8,
242
+ 0.0,
243
+ 1.0,
244
+ ],
245
+ [
246
+ "./examples/cybetruck.jpeg",
247
+ "photo of tesla cybertruck futuristic car 8k high definition on a sand dune in mars, future",
248
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
249
+ 383472451451,
250
+ 0.5,
251
+ 8.5,
252
+ 3,
253
+ 0.8,
254
+ 0.0,
255
+ 1.0,
256
+ ],
257
+ [
258
+ "./examples/jesus.png",
259
+ "a photorealistic painting of Jesus Christ, 4k high definition",
260
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
261
+ 13317204146129588000,
262
+ 0.5,
263
+ 8.5,
264
+ 3,
265
+ 0.8,
266
+ 0.0,
267
+ 1.0,
268
+ ],
269
+ [
270
+ "./examples/anna-sullivan-DioLM8ViiO8-unsplash.jpg",
271
+ "A crowded stadium with enthusiastic fans watching a daytime sporting event, the stands filled with colorful attire and the sun casting a warm glow",
272
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
273
+ 5623124123512,
274
+ 0.5,
275
+ 8.5,
276
+ 3,
277
+ 0.8,
278
+ 0.0,
279
+ 1.0,
280
+ ],
281
+ [
282
+ "./examples/img_aef651cb-2919-499d-aa49-6d4e2e21a56e_1024.jpg",
283
+ "a large red flower on a black background 4k high definition",
284
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
285
+ 23123412341234,
286
+ 0.5,
287
+ 8.5,
288
+ 3,
289
+ 0.8,
290
+ 0.0,
291
+ 1.0,
292
+ ],
293
+ [
294
+ "./examples/huggingface.jpg",
295
+ "photo realistic huggingface human+++ emoji costume, round, yellow, skin+++ texture+++",
296
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic, emoji cartoon, drawing, pixelated",
297
+ 5532144938416372000,
298
+ 0.101,
299
+ 25.206,
300
+ 4.64,
301
+ 0.8,
302
+ 0.0,
303
+ 1.0,
304
+ ],
305
+ ],
306
+ inputs=inputs,
307
+ outputs=outputs,
308
+ cache_examples="lazy",
309
+ )
310
+
311
+
312
+ demo.queue(api_open=False)
313
+ demo.launch(show_api=False)
examples/anna-sullivan-DioLM8ViiO8-unsplash.jpg ADDED
examples/cybetruck.jpeg ADDED
examples/huggingface.jpg ADDED
examples/img_aef651cb-2919-499d-aa49-6d4e2e21a56e_1024.jpg ADDED
examples/jesus.png ADDED

Git LFS Details

  • SHA256: edd3a002427c6e450ac0e7d63c31f80327f3b8030d64190207fb4af826f2439b
  • Pointer size: 131 Bytes
  • Size of remote file: 179 kB
examples/lara.jpeg ADDED
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.29.0
2
+ accelerate
3
+ transformers
4
+ torch==2.2.2
5
+ torchvision
6
+ xformers
7
+ accelerate
8
+ invisible-watermark
9
+ huggingface-hub
10
+ hf-transfer
11
+ gradio_imageslider==0.0.20
12
+ compel
13
+ opencv-python
14
+ numpy
15
+ diffusers==0.27.0
16
+ transformers
17
+ accelerate
18
+ safetensors
19
+ hidiffusion==0.1.8
20
+ spaces
21
+ torch