Spaces:
Building
on
A10G
Building
on
A10G
import sys | |
import os | |
import torch | |
from PIL import Image | |
from typing import List | |
import numpy as np | |
from utils import ( | |
tensor_to_pil, | |
pil_to_tensor, | |
pad_image, | |
postprocess_image, | |
preprocess_image, | |
downloadModels, | |
examples, | |
) | |
sys.path.append(os.path.dirname("./ComfyUI/")) | |
from ComfyUI.nodes import ( | |
CheckpointLoaderSimple, | |
VAEDecode, | |
VAEEncode, | |
KSampler, | |
EmptyLatentImage, | |
CLIPTextEncode, | |
) | |
from ComfyUI.comfy_extras.nodes_compositing import JoinImageWithAlpha | |
from ComfyUI.comfy_extras.nodes_mask import InvertMask, MaskToImage | |
from ComfyUI.comfy import samplers | |
from ComfyUI.custom_nodes.layerdiffuse.layered_diffusion import ( | |
LayeredDiffusionFG, | |
LayeredDiffusionDecode, | |
LayeredDiffusionCond, | |
) | |
import gradio as gr | |
from briarmbg import BriaRMBG | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
downloadModels() | |
with torch.inference_mode(): | |
ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint | |
ckpt = ckpt_load_checkpoint( | |
ckpt_name="juggernautXL_version6Rundiffusion.safetensors" | |
) | |
cliptextencode = CLIPTextEncode().encode | |
emptylatentimage_generate = EmptyLatentImage().generate | |
ksampler_sample = KSampler().sample | |
vae_decode = VAEDecode().decode | |
vae_encode = VAEEncode().encode | |
ld_fg_apply_layered_diffusion = LayeredDiffusionFG().apply_layered_diffusion | |
ld_cond_apply_layered_diffusion = LayeredDiffusionCond().apply_layered_diffusion | |
ld_decode = LayeredDiffusionDecode().decode | |
mask_to_image = MaskToImage().mask_to_image | |
invert_mask = InvertMask().invert | |
join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha | |
rmbg_model = BriaRMBG.from_pretrained("briaai/RMBG-1.4").to(device) | |
def predict( | |
prompt: str, | |
negative_prompt: str, | |
input_image: Image.Image, | |
remove_bg: bool, | |
cond_mode: str, | |
seed: int, | |
sampler_name: str, | |
scheduler: str, | |
steps: int, | |
cfg: float, | |
denoise: float, | |
): | |
seed = seed if seed != -1 else np.random.randint(0, 2**63 - 1) | |
try: | |
with torch.inference_mode(): | |
cliptextencode_prompt = cliptextencode( | |
text=prompt, | |
clip=ckpt[1], | |
) | |
cliptextencode_negative_prompt = cliptextencode( | |
text=negative_prompt, | |
clip=ckpt[1], | |
) | |
emptylatentimage_sample = emptylatentimage_generate( | |
width=1024, height=1024, batch_size=1 | |
) | |
if input_image is not None: | |
input_image = pad_image(input_image).resize((1024, 1024)) | |
if remove_bg: | |
orig_im_size = input_image.size | |
image = preprocess_image(np.array(input_image), [1024, 1024]).to( | |
device | |
) | |
result = rmbg_model(image) | |
# post process | |
result_mask_image = postprocess_image(result[0][0], orig_im_size) | |
# save result | |
pil_mask = Image.fromarray(result_mask_image) | |
no_bg_image = Image.new("RGBA", pil_mask.size, (0, 0, 0, 0)) | |
no_bg_image.paste(input_image, mask=pil_mask) | |
input_image = no_bg_image | |
img_tensor = pil_to_tensor(input_image) | |
img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2]) | |
layereddiffusionapply_sample = ld_cond_apply_layered_diffusion( | |
config=cond_mode, | |
weight=1, | |
model=ckpt[0], | |
cond=cliptextencode_prompt[0], | |
uncond=cliptextencode_negative_prompt[0], | |
latent=img_latent[0], | |
) | |
ksampler = ksampler_sample( | |
steps=steps, | |
cfg=cfg, | |
sampler_name=sampler_name, | |
scheduler=scheduler, | |
seed=seed, | |
model=layereddiffusionapply_sample[0], | |
positive=layereddiffusionapply_sample[1], | |
negative=layereddiffusionapply_sample[2], | |
latent_image=emptylatentimage_sample[0], | |
denoise=denoise, | |
) | |
vaedecode_sample = vae_decode( | |
samples=ksampler[0], | |
vae=ckpt[2], | |
) | |
layereddiffusiondecode_sample = ld_decode( | |
sd_version="SDXL", | |
sub_batch_size=16, | |
samples=ksampler[0], | |
images=vaedecode_sample[0], | |
) | |
rgb_img = tensor_to_pil(vaedecode_sample[0]) | |
return (rgb_img[0], rgb_img[0], seed) | |
else: | |
layereddiffusionapply_sample = ld_fg_apply_layered_diffusion( | |
config="SDXL, Conv Injection", weight=1, model=ckpt[0] | |
) | |
ksampler = ksampler_sample( | |
steps=steps, | |
cfg=cfg, | |
sampler_name=sampler_name, | |
scheduler=scheduler, | |
seed=seed, | |
model=layereddiffusionapply_sample[0], | |
positive=cliptextencode_prompt[0], | |
negative=cliptextencode_negative_prompt[0], | |
latent_image=emptylatentimage_sample[0], | |
denoise=denoise, | |
) | |
vaedecode_sample = vae_decode( | |
samples=ksampler[0], | |
vae=ckpt[2], | |
) | |
layereddiffusiondecode_sample = ld_decode( | |
sd_version="SDXL", | |
sub_batch_size=16, | |
samples=ksampler[0], | |
images=vaedecode_sample[0], | |
) | |
mask = mask_to_image(mask=layereddiffusiondecode_sample[1]) | |
ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0]) | |
inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1]) | |
rgba_img = join_image_with_alpha( | |
image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0] | |
) | |
rgba_img = tensor_to_pil(rgba_img[0]) | |
mask = tensor_to_pil(mask[0]) | |
rgb_img = tensor_to_pil(vaedecode_sample[0]) | |
return (rgba_img[0], mask[0], seed) | |
# return flatten([rgba_img, mask, rgb_img, ld_image]) | |
except Exception as e: | |
raise gr.Error(e) | |
def flatten(l: List[List[any]]) -> List[any]: | |
return [item for sublist in l for item in sublist] | |
def predict_examples( | |
prompt, | |
negative_prompt, | |
input_image=None, | |
remove_bg=False, | |
cond_mode=None, | |
seed=-1, | |
cfg=10, | |
): | |
return predict( | |
prompt, | |
negative_prompt, | |
input_image, | |
remove_bg, | |
cond_mode, | |
seed, | |
"dpmpp_2m_sde_gpu", | |
"karras", | |
30, | |
cfg, | |
1.0, | |
) | |
css = """ | |
.gradio-container { max-width: 68rem !important; } | |
""" | |
with gr.Blocks(css=css) as blocks: | |
gr.Markdown("""# LayerDiffuse (unofficial) | |
Using ComfyUI building blocks with custom node by [huchenlei](https://github.com/huchenlei/ComfyUI-layerdiffuse) | |
Models: [LayerDiffusion/layerdiffusion-v1](https://huggingface.co/LayerDiffusion/layerdiffusion-v1/tree/main) | |
Paper: [Transparent Image Layer Diffusion using Latent Transparency](https://huggingface.co/papers/2402.17113) | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Text(label="Prompt") | |
negative_prompt = gr.Text(label="Negative Prompt") | |
button = gr.Button("Generate") | |
with gr.Accordion(open=False, label="Input Images (Optional)"): | |
with gr.Group(): | |
cond_mode = gr.Radio( | |
value="SDXL, Foreground", | |
choices=["SDXL, Foreground", "SDXL, Background"], | |
info="Whether to use input image as foreground or background", | |
) | |
remove_bg = gr.Checkbox( | |
info="Remove background using BriaRMBG", | |
label="Remove Background", | |
value=False, | |
) | |
input_image = gr.Image( | |
label="Input Image", | |
type="pil", | |
) | |
with gr.Accordion(open=False, label="Advanced Options"): | |
with gr.Group(): | |
with gr.Row(): | |
seed = gr.Slider( | |
label="Seed", | |
value=-1, | |
minimum=-1, | |
maximum=0xFFFFFFFFFFFFFFFF, | |
step=1, | |
) | |
curr_seed = gr.Number( | |
value=-1, interactive=False, scale=0, label=" " | |
) | |
sampler_name = gr.Dropdown( | |
choices=samplers.KSampler.SAMPLERS, | |
label="Sampler Name", | |
value="dpmpp_2m_sde_gpu", | |
) | |
scheduler = gr.Dropdown( | |
choices=samplers.KSampler.SCHEDULERS, | |
label="Scheduler", | |
value="karras", | |
) | |
steps = gr.Slider( | |
label="Steps", value=20, minimum=1, maximum=50, step=1 | |
) | |
cfg = gr.Number( | |
label="CFG", value=5.0, minimum=0.0, maximum=100.0, step=0.1 | |
) | |
denoise = gr.Number( | |
label="Denoise", value=1.0, minimum=0.0, maximum=1.0, step=0.01 | |
) | |
with gr.Column(): | |
image = gr.Image() | |
with gr.Accordion(label="Mask", open=False): | |
mask = gr.Image() | |
inputs = [ | |
prompt, | |
negative_prompt, | |
input_image, | |
remove_bg, | |
cond_mode, | |
seed, | |
sampler_name, | |
scheduler, | |
steps, | |
cfg, | |
denoise, | |
] | |
outputs = [image, mask, curr_seed] | |
button.click(fn=predict, inputs=inputs, outputs=outputs) | |
gr.Examples( | |
fn=predict_examples, | |
examples=examples, | |
inputs=[ | |
prompt, | |
negative_prompt, | |
input_image, | |
remove_bg, | |
cond_mode, | |
seed, | |
], | |
outputs=outputs, | |
cache_examples=True, | |
) | |
if __name__ == "__main__": | |
blocks.queue(api_open=False) | |
blocks.launch(show_api=False) | |