import gradio as gr import torch from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageEnhance from quanto import qfloat8, quantize, freeze from diffusers import StableDiffusionControlNetPipeline, ControlNetModel from diffusers.utils import make_image_grid atkbold = ImageFont.truetype("Atkinson-Hyperlegible-Bold-102.otf",50) default_width = 1920 default_height = 1080 default_timesteps = 8 def mask_image_factory(mask_text="ASK FOR\nA SNACK", width=default_width, height=default_height): img = Image.new("L", (width, height), (0,)) draw = ImageDraw.Draw(img) draw.multiline_text( xy=(0,0), text=mask_text, fill=(255,), font=atkbold, align="center", spacing=0, ) cropped = img.crop(img.getbbox()) # Calculate aspect ratios image_aspect_ratio = width / height cropped_aspect_ratio = cropped.size[0] / cropped.size[1] # Determine which dimension of cropped.size is larger if cropped_aspect_ratio > image_aspect_ratio: # Calculate new dimensions for padding new_width = int(cropped.size[1] * image_aspect_ratio) new_height = cropped.size[1] else: new_width = cropped.size[0] new_height = int(cropped.size[0] / image_aspect_ratio) # Pad the image to the desired aspect ratio padded = ImageOps.pad(cropped, (new_width, new_height)) resized = padded.resize((width, height), resample=Image.Resampling.LANCZOS) return resized preferred_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu") # preferred_device = "cpu" preferred_dtype = torch.float32 controlnet = ControlNetModel.from_pretrained( "monster-labs/control_v1p_sd15_qrcode_monster", # "monster-labs/control_v1p_sdxl_qrcode_monster", subfolder="v2", torch_dtype=preferred_dtype, #torch_dtype=unet_preferred_dtype ).to(preferred_device) #quantize(controlnet, weights=qfloat8) #freeze(controlnet) ctlpipe = StableDiffusionControlNetPipeline.from_pretrained( "SimianLuo/LCM_Dreamshaper_v7", controlnet=controlnet, torch_dtype=preferred_dtype, safety_checker=None, ).to(preferred_device) #quantize(ctlpipe.unet, weights=qfloat8) #freeze(ctlpipe.unet) #quantize(ctlpipe.text_encoder, weights=qfloat8) #freeze(ctlpipe.text_encoder) def app(prompt, negative_prompt, mask_text, num_inference_steps, controlnet_conditioning_scale, width, height, seed, count): all_images = [ctlpipe( prompt=prompt, negative_prompt=negative_prompt, image=mask_image_factory(mask_text=mask_text, width=width, height=height), num_inference_steps=int(num_inference_steps), guidance_scale=8.0, controlnet_conditioning_scale=float(controlnet_conditioning_scale), generator=torch.manual_seed(int(seed + i)), height=height, width=width, ).images[0] for i in range(count)] if count == 1: cols = 1 rows = 1 elif count == 2: cols = 1 rows = 2 else: cols = 2 if count % 2 == 0 else 1 rows = count // cols return make_image_grid(all_images, cols=cols, rows=rows) app("corgis running in the park", "ugly, wrong", "ASK FOR\nA SNACK", 1, 1.0, default_height, default_width, 42, 1) iface = gr.Interface( app, [ gr.Textbox(label="Prompt", value="large happy dogs frolic on the seashore, tall cliffs, sun at the horizon, boulders, tide pools, golden hour"), gr.Textbox(label="Negative Prompt", value="ugly, wrong"), gr.Textbox(label="Mask Text", value="ASK FOR\nA SNACK"), gr.Number(label="Number of Inference Steps", value=default_timesteps, minimum=1, maximum=50, step=1), gr.Slider(label="ControlNet Conditioning Scale", value=0.6, minimum=-1.0, maximum=2.0, step=0.01), gr.Number(label="Width", value=default_width, minimum=256, maximum=2048, precision=0), gr.Number(label="Height", value=default_height, minimum=256, maximum=2048, precision=0), gr.Number(label="Random Number Seed", value=42, minimum=0, maximum=2**32-1, precision=0), gr.Radio(label="Number of Images to Generate with Subsequent Consecutive Seeds", choices=[1, 2, 4, 6, 10], value=2), ], "image", ) iface.launch()