|
from scipy.spatial import ConvexHull |
|
import torch |
|
import torch.nn.functional as F |
|
import numpy as np |
|
from tqdm import tqdm |
|
|
|
def normalize_kp(kp_source, kp_driving, kp_driving_initial, adapt_movement_scale=False, |
|
use_relative_movement=False, use_relative_jacobian=False): |
|
if adapt_movement_scale: |
|
source_area = ConvexHull(kp_source['value'][0].data.cpu().numpy()).volume |
|
driving_area = ConvexHull(kp_driving_initial['value'][0].data.cpu().numpy()).volume |
|
adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area) |
|
else: |
|
adapt_movement_scale = 1 |
|
|
|
kp_new = {k: v for k, v in kp_driving.items()} |
|
|
|
if use_relative_movement: |
|
kp_value_diff = (kp_driving['value'] - kp_driving_initial['value']) |
|
kp_value_diff *= adapt_movement_scale |
|
kp_new['value'] = kp_value_diff + kp_source['value'] |
|
|
|
if use_relative_jacobian: |
|
jacobian_diff = torch.matmul(kp_driving['jacobian'], torch.inverse(kp_driving_initial['jacobian'])) |
|
kp_new['jacobian'] = torch.matmul(jacobian_diff, kp_source['jacobian']) |
|
|
|
return kp_new |
|
|
|
def headpose_pred_to_degree(pred): |
|
device = pred.device |
|
idx_tensor = [idx for idx in range(66)] |
|
idx_tensor = torch.FloatTensor(idx_tensor).type_as(pred).to(device) |
|
pred = F.softmax(pred) |
|
degree = torch.sum(pred*idx_tensor, 1) * 3 - 99 |
|
return degree |
|
|
|
def get_rotation_matrix(yaw, pitch, roll): |
|
yaw = yaw / 180 * 3.14 |
|
pitch = pitch / 180 * 3.14 |
|
roll = roll / 180 * 3.14 |
|
|
|
roll = roll.unsqueeze(1) |
|
pitch = pitch.unsqueeze(1) |
|
yaw = yaw.unsqueeze(1) |
|
|
|
pitch_mat = torch.cat([torch.ones_like(pitch), torch.zeros_like(pitch), torch.zeros_like(pitch), |
|
torch.zeros_like(pitch), torch.cos(pitch), -torch.sin(pitch), |
|
torch.zeros_like(pitch), torch.sin(pitch), torch.cos(pitch)], dim=1) |
|
pitch_mat = pitch_mat.view(pitch_mat.shape[0], 3, 3) |
|
|
|
yaw_mat = torch.cat([torch.cos(yaw), torch.zeros_like(yaw), torch.sin(yaw), |
|
torch.zeros_like(yaw), torch.ones_like(yaw), torch.zeros_like(yaw), |
|
-torch.sin(yaw), torch.zeros_like(yaw), torch.cos(yaw)], dim=1) |
|
yaw_mat = yaw_mat.view(yaw_mat.shape[0], 3, 3) |
|
|
|
roll_mat = torch.cat([torch.cos(roll), -torch.sin(roll), torch.zeros_like(roll), |
|
torch.sin(roll), torch.cos(roll), torch.zeros_like(roll), |
|
torch.zeros_like(roll), torch.zeros_like(roll), torch.ones_like(roll)], dim=1) |
|
roll_mat = roll_mat.view(roll_mat.shape[0], 3, 3) |
|
|
|
rot_mat = torch.einsum('bij,bjk,bkm->bim', pitch_mat, yaw_mat, roll_mat) |
|
|
|
return rot_mat |
|
|
|
def keypoint_transformation(kp_canonical, he, wo_exp=False): |
|
kp = kp_canonical['value'] |
|
yaw, pitch, roll= he['yaw'], he['pitch'], he['roll'] |
|
yaw = headpose_pred_to_degree(yaw) |
|
pitch = headpose_pred_to_degree(pitch) |
|
roll = headpose_pred_to_degree(roll) |
|
|
|
if 'yaw_in' in he: |
|
yaw = he['yaw_in'] |
|
if 'pitch_in' in he: |
|
pitch = he['pitch_in'] |
|
if 'roll_in' in he: |
|
roll = he['roll_in'] |
|
|
|
rot_mat = get_rotation_matrix(yaw, pitch, roll) |
|
|
|
t, exp = he['t'], he['exp'] |
|
if wo_exp: |
|
exp = exp*0 |
|
|
|
|
|
kp_rotated = torch.einsum('bmp,bkp->bkm', rot_mat, kp) |
|
|
|
|
|
t[:, 0] = t[:, 0]*0 |
|
t[:, 2] = t[:, 2]*0 |
|
t = t.unsqueeze(1).repeat(1, kp.shape[1], 1) |
|
kp_t = kp_rotated + t |
|
|
|
|
|
exp = exp.view(exp.shape[0], -1, 3) |
|
kp_transformed = kp_t + exp |
|
|
|
return {'value': kp_transformed} |
|
|
|
|
|
|
|
def make_animation(source_image, source_semantics, target_semantics, |
|
generator, kp_detector, he_estimator, mapping, |
|
yaw_c_seq=None, pitch_c_seq=None, roll_c_seq=None, |
|
use_exp=True, use_half=False): |
|
with torch.no_grad(): |
|
predictions = [] |
|
|
|
kp_canonical = kp_detector(source_image) |
|
he_source = mapping(source_semantics) |
|
kp_source = keypoint_transformation(kp_canonical, he_source) |
|
|
|
for frame_idx in tqdm(range(target_semantics.shape[1]), 'Face Renderer:'): |
|
|
|
|
|
target_semantics_frame = target_semantics[:, frame_idx] |
|
he_driving = mapping(target_semantics_frame) |
|
if yaw_c_seq is not None: |
|
he_driving['yaw_in'] = yaw_c_seq[:, frame_idx] |
|
if pitch_c_seq is not None: |
|
he_driving['pitch_in'] = pitch_c_seq[:, frame_idx] |
|
if roll_c_seq is not None: |
|
he_driving['roll_in'] = roll_c_seq[:, frame_idx] |
|
|
|
kp_driving = keypoint_transformation(kp_canonical, he_driving) |
|
|
|
kp_norm = kp_driving |
|
out = generator(source_image, kp_source=kp_source, kp_driving=kp_norm) |
|
''' |
|
source_image_new = out['prediction'].squeeze(1) |
|
kp_canonical_new = kp_detector(source_image_new) |
|
he_source_new = he_estimator(source_image_new) |
|
kp_source_new = keypoint_transformation(kp_canonical_new, he_source_new, wo_exp=True) |
|
kp_driving_new = keypoint_transformation(kp_canonical_new, he_driving, wo_exp=True) |
|
out = generator(source_image_new, kp_source=kp_source_new, kp_driving=kp_driving_new) |
|
''' |
|
predictions.append(out['prediction']) |
|
predictions_ts = torch.stack(predictions, dim=1) |
|
return predictions_ts |
|
|
|
class AnimateModel(torch.nn.Module): |
|
""" |
|
Merge all generator related updates into single model for better multi-gpu usage |
|
""" |
|
|
|
def __init__(self, generator, kp_extractor, mapping): |
|
super(AnimateModel, self).__init__() |
|
self.kp_extractor = kp_extractor |
|
self.generator = generator |
|
self.mapping = mapping |
|
|
|
self.kp_extractor.eval() |
|
self.generator.eval() |
|
self.mapping.eval() |
|
|
|
def forward(self, x): |
|
|
|
source_image = x['source_image'] |
|
source_semantics = x['source_semantics'] |
|
target_semantics = x['target_semantics'] |
|
yaw_c_seq = x['yaw_c_seq'] |
|
pitch_c_seq = x['pitch_c_seq'] |
|
roll_c_seq = x['roll_c_seq'] |
|
|
|
predictions_video = make_animation(source_image, source_semantics, target_semantics, |
|
self.generator, self.kp_extractor, |
|
self.mapping, use_exp = True, |
|
yaw_c_seq=yaw_c_seq, pitch_c_seq=pitch_c_seq, roll_c_seq=roll_c_seq) |
|
|
|
return predictions_video |