Long time generation code from google_colab relate to diffusers
Code from google_colab for generation text_to_image:
class ImageGenerator:
def init(self):
self.device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
self.config_file_c = '/StableCascade/configs/inference/stage_c_3b.yaml'
self.config_file_b = '/StableCascade/configs/inference/stage_b_3b.yaml'
self.setup_models()
def setup_models(self):
# SETUP STAGE C
with open(self.config_file_c, "r", encoding="utf-8") as file:
loaded_config = yaml.safe_load(file)
self.core = WurstCoreC(config_dict=loaded_config, device=self.device, training=False)
self.extras = self.core.setup_extras_pre()
models = self.core.setup_models(self.extras)
models.generator.eval().requires_grad_(False)
print("STAGE C READY")
# SETUP STAGE B
with open(self.config_file_b, "r", encoding="utf-8") as file:
config_file_b = yaml.safe_load(file)
self.core_b = WurstCoreB(config_dict=config_file_b, device=self.device, training=False)
self.extras_b = self.core_b.setup_extras_pre()
models_b = self.core_b.setup_models(self.extras_b, skip_clip=True)
models_b = WurstCoreB.Models(
**{**models_b.to_dict(), 'tokenizer': models.tokenizer, 'text_model': models.text_model}
)
models_b.generator.bfloat16().eval().requires_grad_(False)
print("STAGE B READY")
# Compile models
models = WurstCoreC.Models(
**{**models.to_dict(), 'generator': torch.compile(models.generator, mode="reduce-overhead", fullgraph=True)}
)
models_b = WurstCoreB.Models(
**{**models_b.to_dict(), 'generator': torch.compile(models_b.generator, mode="reduce-overhead", fullgraph=True)}
)
self.models = models
self.models_b = models_b
def generate_and_save(self, caption, batch_size=1, height=1024, width=1024):
# PREPARE CONDITIONS
stage_c_latent_shape, stage_b_latent_shape = calculate_latent_sizes(height, width, batch_size=batch_size)
# Stage C Parameters
self.extras.sampling_configs['cfg'] = 4
self.extras.sampling_configs['shift'] = 2
self.extras.sampling_configs['timesteps'] = 20
self.extras.sampling_configs['t_start'] = 1.0
# Stage B Parameters
self.extras_b.sampling_configs['cfg'] = 1.1
self.extras_b.sampling_configs['shift'] = 1
self.extras_b.sampling_configs['timesteps'] = 10
self.extras_b.sampling_configs['t_start'] = 1.0
batch = {'captions': [caption] * batch_size}
conditions = self.core.get_conditions(batch, self.models, self.extras, is_eval=True, is_unconditional=False, eval_image_embeds=False)
unconditions = self.core.get_conditions(batch, self.models, self.extras, is_eval=True, is_unconditional=True, eval_image_embeds=False)
conditions_b = self.core_b.get_conditions(batch, self.models_b, self.extras_b, is_eval=True, is_unconditional=False)
unconditions_b = self.core_b.get_conditions(batch, self.models_b, self.extras_b, is_eval=True, is_unconditional=True)
with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
# Stage C Sampling
sampling_c = self.extras.gdf.sample(
self.models.generator, conditions, stage_c_latent_shape,
unconditions, device=self.device, **self.extras.sampling_configs,
)
for (sampled_c, _, _) in tqdm(sampling_c, total=self.extras.sampling_configs['timesteps']):
sampled_c = sampled_c
# Stage B Sampling
conditions_b['effnet'] = sampled_c
unconditions_b['effnet'] = torch.zeros_like(sampled_c)
sampling_b = self.extras_b.gdf.sample(
self.models_b.generator, conditions_b, stage_b_latent_shape,
unconditions_b, device=self.device, **self.extras_b.sampling_configs
)
for (sampled_b, _, _) in tqdm(sampling_b, total=self.extras_b.sampling_configs['timesteps']):
sampled_b = sampled_b
sampled = self.models_b.stage_a.decode(sampled_b).float()
sampled_image = T.ToPILImage()(sampled[0].cpu().detach().clamp_(0, 1))
return sampled_image
Generation of input by code from "google_colab" equal 12 min, and generation by code using "diffusers" equal 30 sec.
What could be the reason for such a long generation?
I use :
big - model_version: 3.6B, dtype: bfloat16 , stage_c_bg16.safetensors
big - model_version: 3B , dtype: bfloat16, stage_b_bf16.safetensors
Is this right
self.device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
I'm not sure about the device numbering on Colab try
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Thanks for answering!
Yes, it is right, "cuda:2" is the name of my device, and i can see how gpu filling up when i run the code. (I'm running the code on my local machine)
Does anyone have a similar problem? Or do you all have about the same execution time?