Spaces:
Build error
Build error
import sys | |
import torch | |
import gradio as gr | |
import pickle | |
from easydict import EasyDict as edict | |
from huggingface_hub import hf_hub_download | |
sys.path.append("./rome/") | |
sys.path.append('./DECA') | |
from rome.infer import Infer | |
from rome.src.utils.processing import process_black_shape, tensor2image | |
# loading models ---- create model repo | |
default_modnet_path = hf_hub_download('Pie31415/rome', 'modnet_photographic_portrait_matting.ckpt') | |
default_model_path = hf_hub_download('Pie31415/rome', 'rome.pth') | |
# parser configurations | |
args = edict({ | |
"save_dir": ".", | |
"save_render": True, | |
"model_checkpoint": default_model_path, | |
"modnet_path": default_modnet_path, | |
"random_seed": 0, | |
"debug": False, | |
"verbose": False, | |
"model_image_size": 256, | |
"align_source": True, | |
"align_target": False, | |
"align_scale": 1.25, | |
"use_mesh_deformations": False, | |
"subdivide_mesh": False, | |
"renderer_sigma": 1e-08, | |
"renderer_zfar": 100.0, | |
"renderer_type": "soft_mesh", | |
"renderer_texture_type": "texture_uv", | |
"renderer_normalized_alphas": False, | |
"deca_path": "DECA", | |
"rome_data_dir": "rome/data", | |
"autoenc_cat_alphas": False, | |
"autoenc_align_inputs": False, | |
"autoenc_use_warp": False, | |
"autoenc_num_channels": 64, | |
"autoenc_max_channels": 512, | |
"autoenc_num_groups": 4, | |
"autoenc_num_bottleneck_groups": 0, | |
"autoenc_num_blocks": 2, | |
"autoenc_num_layers": 4, | |
"autoenc_block_type": "bottleneck", | |
"neural_texture_channels": 8, | |
"num_harmonic_encoding_funcs": 6, | |
"unet_num_channels": 64, | |
"unet_max_channels": 512, | |
"unet_num_groups": 4, | |
"unet_num_blocks": 1, | |
"unet_num_layers": 2, | |
"unet_block_type": "conv", | |
"unet_skip_connection_type": "cat", | |
"unet_use_normals_cond": True, | |
"unet_use_vertex_cond": False, | |
"unet_use_uvs_cond": False, | |
"unet_pred_mask": False, | |
"use_separate_seg_unet": True, | |
"norm_layer_type": "gn", | |
"activation_type": "relu", | |
"conv_layer_type": "ws_conv", | |
"deform_norm_layer_type": "gn", | |
"deform_activation_type": "relu", | |
"deform_conv_layer_type": "ws_conv", | |
"unet_seg_weight": 0.0, | |
"unet_seg_type": "bce_with_logits", | |
"deform_face_tightness": 0.0001, | |
"use_whole_segmentation": False, | |
"mask_hair_for_neck": False, | |
"use_hair_from_avatar": False, | |
"use_scalp_deforms": True, | |
"use_neck_deforms": True, | |
"use_basis_deformer": False, | |
"use_unet_deformer": True, | |
"pretrained_encoder_basis_path": "", | |
"pretrained_vertex_basis_path": "", | |
"num_basis": 50, | |
"basis_init": "pca", | |
"num_vertex": 5023, | |
"train_basis": True, | |
"path_to_deca": "DECA", | |
"path_to_linear_hair_model": "data/linear_hair.pth", # N/A | |
"path_to_mobile_model": "data/disp_model.pth", # N/A | |
"n_scalp": 60, | |
"use_distill": False, | |
"use_mobile_version": False, | |
"deformer_path": "data/rome.pth", | |
"output_unet_deformer_feats": 32, | |
"use_deca_details": False, | |
"use_flametex": False, | |
"upsample_type": "nearest", | |
"num_frequencies": 6, | |
"deform_face_scale_coef": 0.0, | |
"device": "cpu" | |
}) | |
# download FLAME and DECA pretrained | |
generic_model_path = hf_hub_download('Pie31415/rome', 'generic_model.pkl') | |
deca_model_path = hf_hub_download('Pie31415/rome', 'deca_model.tar') | |
with open(generic_model_path, 'rb') as f: | |
ss = pickle.load(f, encoding='latin1') | |
with open('./DECA/data/generic_model.pkl', 'wb') as out: | |
pickle.dump(ss, out) | |
with open(deca_model_path, "rb") as input: | |
with open('./DECA/data/deca_model.tar', "wb") as out: | |
for line in input: | |
out.write(line) | |
# load ROME inference model | |
infer = Infer(args) | |
def image_inference( | |
source_img: gr.inputs.Image = None, | |
driver_img: gr.inputs.Image = None | |
): | |
out = infer.evaluate(source_img, driver_img, crop_center=False) | |
res = tensor2image(torch.cat([out['source_information']['data_dict']['source_img'][0].cpu(), | |
out['source_information']['data_dict']['target_img'][0].cpu(), | |
out['render_masked'].cpu(), out['pred_target_shape_img'][0].cpu()], dim=2)) | |
return res[..., ::-1] | |
def video_inference(): | |
pass | |
with gr.Blocks() as demo: | |
gr.Markdown("# **<p align='center'>ROME: Realistic one-shot mesh-based head avatars</p>**") | |
gr.Markdown( | |
""" | |
<p style='text-align: center'> | |
Create a personal avatar from just a single image using ROME. | |
<br> <a href='https://arxiv.org/abs/2206.08343' target='_blank'>Paper</a> | <a href='https://samsunglabs.github.io/rome' target='_blank'>Project Page</a> | <a href='https://github.com/SamsungLabs/rome' target='_blank'>Github</a> | |
</p> | |
""" | |
) | |
with gr.Tab("Image Inference"): | |
with gr.Row(): | |
source_img = gr.Image(type="pil", label="source image", show_label=True) | |
driver_img = gr.Image(type="pil", label="driver image", show_label=True) | |
image_output = gr.Image() | |
image_button = gr.Button("Predict") | |
with gr.Tab("Video Inference"): | |
with gr.Row(): | |
source_video = gr.Video(label="source video", ) | |
driver_image_for_vid = gr.Image(type="pil", label="driver image", show_label=True) | |
video_output = gr.Image() | |
video_button = gr.Button("Predict") | |
gr.Examples( | |
examples=[ | |
["./examples/lincoln.jpg", "./examples/taras2.jpg"], | |
["./examples/lincoln.jpg", "./examples/taras1.jpg"] | |
], | |
inputs=[source_img, driver_img], | |
outputs=[image_output], | |
fn=image_inference, | |
cache_examples=True | |
) | |
image_button.click(image_inference, inputs=[source_img, driver_img], outputs=image_output) | |
video_button.click(None, inputs=[source_video, driver_image_for_vid], outputs=video_output) | |
demo.launch() |