import os from io import BytesIO import gradio as gr import grpc from PIL import Image import pandas as pd from inference_pb2 import SFERequest, SFEResponse, SFERequestMask, SFEResponseMask from inference_pb2_grpc import SFEServiceStub PREDEFINED_EDITINGS_DATA = { "glasses": ([-20.0, 30.0], False), "smile": ([-10.0, 10.0], False), "makeup": ([-10.0, 15.0], False), "eye_openness": ([-45.0, 30.0], True), "trimmed_beard": ([-30.0, 30.0], True), "face_roundness": ([-20.0, 15.0], False), "nose_length": ([-30.0, 30.0], True), "eyebrow_thickness": ([-20.0, 20.0], True), "displeased": ([-10.0, 10.0], False), "age": ([-10.0, 10.0], False), "rotation": ([-7.0, 7.0], False), "afro": ([0, 0.14], False), "angry": ([0, 0.14], False), "bobcut": ([0, 0.18], False), "bowlcut": ([0, 0.14], False), "mohawk": ([0, 0.1], False), "curly_hair": ([0, 0.12], False), "purple_hair": ([0, 0.12], False), "surprised": ([0, 0.1], False), "beyonce": ([0, 0.12], False), "hilary_clinton": ([0, 0.1], False), "depp": ([0, 0.12], False), "taylor_swift": ([0, 0.1], False), "trump": ([0, 0.1], False), "zuckerberg": ([0, 0.1], False), "black hair": ([-7.0, 10.0], False), "blond hair": ([-7.0, 10.0], True), "grey hair": ([-7.0, 7.0], True), "wavy hair": ([-7.0, 7.0], False), "receding hairline": ([-10.0, 10.0], True), "sideburns": ([-7.0, 7.0], True), "goatee": ([-7.0, 7.0], True), "gender swap": ([-10.0, 7.0], False) } DIRECTIONS_NAME_SWAP = { "smile" : "fs_smiling", "glasses": "fs_glasses", "makeup": "fs_makeup", "gender swap": "gender" } def denormalize_power(direction_name, directon_power): if direction_name not in PREDEFINED_EDITINGS_DATA: return directon_power original_range, is_reversed = PREDEFINED_EDITINGS_DATA[direction_name] if directon_power > 0: normalized = directon_power / 15 * abs(original_range[1]) else: normalized = directon_power / 15 * abs(original_range[0]) if is_reversed: normalized = -normalized return normalized def get_bytes(img): if img is None: return img buffered = BytesIO() img.save(buffered, format="JPEG") return buffered.getvalue() def bytes_to_image(image: bytes) -> Image.Image: image = Image.open(BytesIO(image)) return image def edit_image(orig_image, edit_direction, edit_power, align, mask, progress=gr.Progress(track_tqdm=True)): # output_align, output_unalign if edit_direction in DIRECTIONS_NAME_SWAP: edit_direction = DIRECTIONS_NAME_SWAP[edit_direction] if not orig_image: return gr.update(visible=False), gr.update(visible=False), gr.update(value="Need to upload an input image ❗", visible=True), gr.update(visible=False), gr.update(visible=False) orig_image_bytes = get_bytes(orig_image) mask_bytes = get_bytes(mask) if mask_bytes is None: mask_bytes = b"mask" edit_power = denormalize_power(edit_direction, edit_power) with grpc.insecure_channel(os.environ["SERVER"]) as channel: stub = SFEServiceStub(channel) output: SFEResponse = stub.edit( SFERequest(orig_image=orig_image_bytes, direction=edit_direction, power=edit_power, align=align, mask=mask_bytes, use_cache=True) ) if output.image == b"aligner error": return gr.update(visible=False), gr.update(visible=False), gr.update(value="Face aligner can not find face in your image 😢 Try to upload another one", visible=True), gr.update(visible=False), gr.update(visible=False), output_edited = bytes_to_image(output.image) output_inv = bytes_to_image(output.inv_image) if not align: return gr.update(value=output_edited, visible=True), gr.update(value=output_inv, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) output_aligned = bytes_to_image(output.aligned) output_unaligned = bytes_to_image(output.unaligned) return gr.update(value=output_edited, visible=True), gr.update(value=output_inv, visible=True), gr.update(visible=False), gr.update(value=output_aligned, visible=True), gr.update(value=output_unaligned, visible=True) def edit_image_clip(orig_image, neutral_prompt, target_prompt, disentanglement, edit_power, align, mask, edit_method, progress=gr.Progress(track_tqdm=True)): if edit_method == "StyleClip": edit_direction = "_".join(["styleclip_global", neutral_prompt, target_prompt, str(disentanglement)]) else: edit_power = edit_power / 10 disentanglement = disentanglement / 3 edit_direction = "_".join(["deltaedit", neutral_prompt, target_prompt, str(disentanglement)]) return edit_image(orig_image, edit_direction, edit_power, align, mask, progress=None) def get_mask(input_image, align, mask_trashhold, progress=gr.Progress(track_tqdm=True)): if not input_image: return gr.update(visible=False), gr.update(value="Need to upload an input image ❗", visible=True) input_image_bytes = get_bytes(input_image) with grpc.insecure_channel(os.environ["SERVER"]) as channel: stub = SFEServiceStub(channel) output: SFEResponseMask = stub.generate_mask( SFERequestMask(orig_image=input_image_bytes, trashold=mask_trashhold, align=align, use_cache=True) ) if output.mask == b"aligner error": return gr.update(visible=False), gr.update(value="Face aligner can not find face in your image 😢 Try to upload another one", visible=True) if output.mask == b"masker face parser error": return gr.update(visible=False), gr.update(value="Masker's face detector can't find face in your image 😢 Try to upload another one", visible=True) output_mask = bytes_to_image(output.mask) return gr.update(value=output_mask, visible=True), gr.update(visible=False) def get_demo(): editings_table = pd.read_csv("editings_table.csv") editings_table = editings_table.style.set_properties(**{"text-align": "center"}) editings_table = editings_table.set_table_styles([dict(selector="th", props=[("text-align", "center")])]) with gr.Blocks() as demo: gr.Markdown("## StyleFeatureEditor") gr.Markdown( '
' 'Official Gradio demo for StyleFeatureEditor:' '' '' '' '' '
' ) with gr.Row(): with gr.Column(): with gr.Accordion("Input Image", open=True): input_image = gr.Image(label="Input image you want to edit", type="pil", height=300) align = gr.Checkbox(label="Align (crop and resize) the input image. For SFE to work well, it is necessary to align the input if it is not.", value=True) with gr.Accordion("Predefined Editings", open=True): with gr.Accordion("Description", open=False): gr.Markdown('''A branch of predefined editings gained from InterfaceGAN, Stylespace, GANSpace and StyleClip mappers. Look at the table below to see which direction is responsible for which editings. **Editing power** -- the greater the absolute value of this parameter, the more the selected edit will appear. Better use values in the range 7 - 13, lower values may not give the desired edit, higher values -- on the contrary -- may apply edit too much and create artefacts. **Positive effect** -- the effect applied to the image when positive editing power is used. **Negative effect** -- the effect applied to the image when negative editing power is used. It is usually the opposite of the positive effect. ''' ) gr.Dataframe(value=editings_table, datatype=["markdown","markdown","markdown","markdown"], interactive=False, wrap=True, column_widths=["25px", "25px", "25px", "25px"], height=300) # 100 with gr.Row(): predef_editing_direction = gr.Dropdown(list(PREDEFINED_EDITINGS_DATA.keys()), label="Editing direction", value="smile") predef_editing_power = gr.Slider(-20, 20, value=7, step=0.1, label="Editing power") btn_predef = gr.Button("Edit image") with gr.Accordion("Text Prompt Editings", open=False): with gr.Accordion("Description", open=False): gr.Markdown('''You can alse use editings from text prompts via **StyleClip Global Mapper** (https://arxiv.org/abs/2103.17249) or **DeltaEdit** (https://arxiv.org/abs/2303.06285). You just need to choose: **Method** -- method to use, StyleClip or DeltaEdit **Editing power** -- the greater the absolute value of this parameter, the more the selected edit will appear. **Neutral prompt** -- some neutral description of the original image (e.g. "a face"). **Target prompt** -- text that contains the desired edit (e.g. "a smilling face"). **Disentanglement** -- positive number, the less this attribute -- the more related attributes will also be changed (e.g. for grey hair editing, wrinkle, skin colour and glasses may also be edited) ''') edit_method = gr.Dropdown(["StyleClip", "DeltaEdit"], label="Editing method", value="StyleClip") neutral_prompt = gr.Textbox(value="face with hair", label="Neutreal prompt (e.g. 'a face')") target_prompt = gr.Textbox(value="face with fire hair", label="Target prompt (e.g. 'a smilling face')") styleclip_editing_power = gr.Slider(-50, 50, value=10, step=1, label="Editing power") disentanglement = gr.Slider(0, 1, value=0.1, step=0.01, label="Disentanglement") btn_clip = gr.Button("Edit image") with gr.Accordion("Mask settings (optional)", open=False): gr.Markdown('''If some artefacts appear during editing (or some details disappear), you can specify an image mask to select which regions of the image should not be edited. The mask must have a size of 1024 x 1024 and represent an inversion of the original image. ''' ) mask = gr.Image(label="Upload mask for editing", type="pil", height=350) with gr.Accordion("Mask generating", open=False): gr.Markdown("Here you can generate mask that separates face (with hair) from the background.") with gr.Row(): input_mask = gr.Image(label="Input image for mask generating", type="pil", height=240) output_mask = gr.Image(label="Generated mask", height=240) error_message_mask = gr.Textbox(label="⚠️ Error ⚠️", visible=False, elem_classes="error-message") align_mask = gr.Checkbox(label="To align (crop and resize image) or not. Only uncheck this box if the original image has already been aligned.", value=True) mask_trashhold = gr.Slider(0, 1, value=0.9, step=0.001, label="Mask trashold", info="The more this parameter, the more is face part, and the less is background part.") btn_mask = gr.Button("Generate mask") with gr.Column(): with gr.Row(): output_align = gr.Image(label="Alignet original image", visible=True) output_unalign = gr.Image(label="Unalinget editing result", visible=True) with gr.Row(): output_inv = gr.Image(label="Inversion result", visible=True) output_edit = gr.Image(label="Editing result", visible=True) error_message = gr.Textbox(label="⚠️ Error ⚠️", visible=False, elem_classes="error-message") gr.Markdown("If artefacts appear during editing -- try lowering the editing power or using a mask.") gr.Examples( label="Input Examples for editing", examples=[ ["images/scarlet.jpg"], ["images/gosling.jpg"], ["images/robert.png"], ["images/smith.jpg"], ["images/watson.jpeg"], ], inputs=[input_image], examples_per_page=5 ) gr.Examples( label="Mask Examples for editing", examples=[ ["images/scarlet_mask.webp"], ["images/gosling_mask.webp"], ["images/robert_mask.webp"], ["images/smith_mask.webp"], ["images/watson_mask.webp"], ], inputs=[mask] ) gr.Examples( label="Input Examples for Mask generation", examples=[ ["images/scarlet.jpg"], ["images/gosling.jpg"], ["images/robert.png"], ["images/smith.jpg"], ["images/watson.jpeg"], ], inputs=[input_mask] ) btn_predef.click( fn=edit_image, inputs=[input_image, predef_editing_direction, predef_editing_power, align, mask], outputs=[output_edit, output_inv, error_message, output_align, output_unalign] ) btn_clip.click( fn=edit_image_clip, inputs=[input_image, neutral_prompt, target_prompt, disentanglement, styleclip_editing_power, align, mask, edit_method], outputs=[output_edit, output_inv, error_message, output_align, output_unalign,] ) btn_mask.click( fn=get_mask, inputs=[input_mask, align_mask, mask_trashhold], outputs=[output_mask, error_message_mask] ) gr.Markdown('''To cite the paper by the authors ``` @InProceedings{Bobkov_2024_CVPR, author = {Bobkov, Denis and Titov, Vadim and Alanov, Aibek and Vetrov, Dmitry}, title = {The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9337-9346} } ``` ''') return demo if __name__ == "__main__": demo = get_demo() demo.launch(server_name="0.0.0.0", server_port=7860)