Finish setup for initial pipeline
Browse files- pipeline.py +16 -62
pipeline.py
CHANGED
@@ -71,7 +71,7 @@ if is_torch_xla_available():
|
|
71 |
XLA_AVAILABLE = True
|
72 |
else:
|
73 |
XLA_AVAILABLE = False
|
74 |
-
|
75 |
|
76 |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
77 |
|
@@ -79,13 +79,18 @@ EXAMPLE_DOC_STRING = """
|
|
79 |
Examples:
|
80 |
```py
|
81 |
>>> import torch
|
82 |
-
>>> from diffusers import
|
83 |
-
>>>
|
84 |
-
|
|
|
|
|
|
|
|
|
85 |
... )
|
86 |
>>> pipe = pipe.to("cuda")
|
87 |
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
88 |
-
>>>
|
|
|
89 |
```
|
90 |
"""
|
91 |
|
@@ -315,37 +320,6 @@ class PAGCFGIdentitySelfAttnProcessor:
|
|
315 |
|
316 |
return hidden_states
|
317 |
|
318 |
-
if is_invisible_watermark_available():
|
319 |
-
from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
320 |
-
|
321 |
-
if is_torch_xla_available():
|
322 |
-
import torch_xla.core.xla_model as xm
|
323 |
-
|
324 |
-
XLA_AVAILABLE = True
|
325 |
-
else:
|
326 |
-
XLA_AVAILABLE = False
|
327 |
-
|
328 |
-
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
329 |
-
|
330 |
-
EXAMPLE_DOC_STRING = """
|
331 |
-
Examples:
|
332 |
-
```py
|
333 |
-
>>> import torch
|
334 |
-
>>> from diffusers import StableDiffusionXLImg2ImgPipeline
|
335 |
-
>>> from diffusers.utils import load_image
|
336 |
-
|
337 |
-
>>> pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
|
338 |
-
... "stabilityai/stable-diffusion-xl-base-1.0", custom_pipeline="jyoung105/sdxl_perturbed_attention_guidance_i2i", torch_dtype=torch.float16,
|
339 |
-
... )
|
340 |
-
>>> pipe = pipe.to("cuda")
|
341 |
-
>>> url = "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
|
342 |
-
|
343 |
-
>>> init_image = load_image(url).convert("RGB")
|
344 |
-
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
345 |
-
>>> image = pipe(prompt, image=init_image, pag_scale=3.0, pag_applied_layers=['mid']).images[0]
|
346 |
-
```
|
347 |
-
"""
|
348 |
-
|
349 |
|
350 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
351 |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
@@ -767,7 +741,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
767 |
unscale_lora_layers(self.text_encoder_2, lora_scale)
|
768 |
|
769 |
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
770 |
-
|
771 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
772 |
def prepare_extra_step_kwargs(self, generator, eta):
|
773 |
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
@@ -923,26 +897,6 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
923 |
return timesteps, num_inference_steps
|
924 |
|
925 |
return timesteps, num_inference_steps - t_start
|
926 |
-
|
927 |
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
928 |
-
# def prepare_latents(
|
929 |
-
# self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None
|
930 |
-
# ):
|
931 |
-
# shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
|
932 |
-
# if isinstance(generator, list) and len(generator) != batch_size:
|
933 |
-
# raise ValueError(
|
934 |
-
# f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
935 |
-
# f" size of {batch_size}. Make sure the batch size matches the length of the generators."
|
936 |
-
# )
|
937 |
-
|
938 |
-
# if latents is None:
|
939 |
-
# latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
940 |
-
# else:
|
941 |
-
# latents = latents.to(device)
|
942 |
-
|
943 |
-
# # scale the initial noise by the standard deviation required by the scheduler
|
944 |
-
# latents = latents * self.scheduler.init_noise_sigma
|
945 |
-
# return latents
|
946 |
|
947 |
def prepare_latents(
|
948 |
self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
|
@@ -1259,14 +1213,14 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1259 |
def cross_attention_kwargs(self):
|
1260 |
return self._cross_attention_kwargs
|
1261 |
|
1262 |
-
@property
|
1263 |
-
def denoising_end(self):
|
1264 |
-
return self._denoising_end
|
1265 |
-
|
1266 |
@property
|
1267 |
def denoising_start(self):
|
1268 |
return self._denoising_start
|
1269 |
|
|
|
|
|
|
|
|
|
1270 |
@property
|
1271 |
def num_timesteps(self):
|
1272 |
return self._num_timesteps
|
@@ -1547,8 +1501,8 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1547 |
self._guidance_rescale = guidance_rescale
|
1548 |
self._clip_skip = clip_skip
|
1549 |
self._cross_attention_kwargs = cross_attention_kwargs
|
1550 |
-
self._denoising_end = denoising_end
|
1551 |
self._denoising_start = denoising_start
|
|
|
1552 |
self._interrupt = False
|
1553 |
|
1554 |
self._pag_scale = pag_scale
|
|
|
71 |
XLA_AVAILABLE = True
|
72 |
else:
|
73 |
XLA_AVAILABLE = False
|
74 |
+
|
75 |
|
76 |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
77 |
|
|
|
79 |
Examples:
|
80 |
```py
|
81 |
>>> import torch
|
82 |
+
>>> from diffusers import StableDiffusionXLImg2ImgPipeline, AutoencoderKL
|
83 |
+
>>> from diffusers.utils import load_image
|
84 |
+
>>> vae = AutoencoderKL.from_pretrained(
|
85 |
+
... "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
|
86 |
+
... )
|
87 |
+
>>> pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
|
88 |
+
... "stabilityai/stable-diffusion-xl-base-1.0", custom_pipeline="jyoung105/sdxl_perturbed_attention_guidance_i2i", vae=vae, torch_dtype=torch.float16
|
89 |
... )
|
90 |
>>> pipe = pipe.to("cuda")
|
91 |
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
92 |
+
>>> init_image = load_image("")
|
93 |
+
>>> image = pipe(prompt, image=init_image, strength=0.9, pag_scale=3.0, pag_applied_layers=['mid']).images[0]
|
94 |
```
|
95 |
"""
|
96 |
|
|
|
320 |
|
321 |
return hidden_states
|
322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
|
324 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
325 |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
|
741 |
unscale_lora_layers(self.text_encoder_2, lora_scale)
|
742 |
|
743 |
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
744 |
+
|
745 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
746 |
def prepare_extra_step_kwargs(self, generator, eta):
|
747 |
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
|
|
897 |
return timesteps, num_inference_steps
|
898 |
|
899 |
return timesteps, num_inference_steps - t_start
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
900 |
|
901 |
def prepare_latents(
|
902 |
self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
|
|
|
1213 |
def cross_attention_kwargs(self):
|
1214 |
return self._cross_attention_kwargs
|
1215 |
|
|
|
|
|
|
|
|
|
1216 |
@property
|
1217 |
def denoising_start(self):
|
1218 |
return self._denoising_start
|
1219 |
|
1220 |
+
@property
|
1221 |
+
def denoising_end(self):
|
1222 |
+
return self._denoising_end
|
1223 |
+
|
1224 |
@property
|
1225 |
def num_timesteps(self):
|
1226 |
return self._num_timesteps
|
|
|
1501 |
self._guidance_rescale = guidance_rescale
|
1502 |
self._clip_skip = clip_skip
|
1503 |
self._cross_attention_kwargs = cross_attention_kwargs
|
|
|
1504 |
self._denoising_start = denoising_start
|
1505 |
+
self._denoising_end = denoising_end
|
1506 |
self._interrupt = False
|
1507 |
|
1508 |
self._pag_scale = pag_scale
|