Spaces:

NoCrypt
/

pixelization

Running

File size: 11,985 Bytes

from .basic_layer import *
import torchvision.models as models
import os



class AliasNet(nn.Module):
    def __init__(self, input_dim, output_dim, dim, n_downsample, n_res, activ='relu', pad_type='reflect'):
        super(AliasNet, self).__init__()
        self.RGBEnc = AliasRGBEncoder(input_dim, dim, n_downsample, n_res, "in", activ, pad_type=pad_type)
        self.RGBDec = AliasRGBDecoder(self.RGBEnc.output_dim, output_dim, n_downsample, n_res, res_norm='in',
                                      activ=activ, pad_type=pad_type)

    def forward(self, x):
        x = self.RGBEnc(x)
        x = self.RGBDec(x)
        return x


class AliasRGBEncoder(nn.Module):
    def __init__(self, input_dim, dim, n_downsample, n_res, norm, activ, pad_type):
        super(AliasRGBEncoder, self).__init__()
        self.model = []
        self.model += [AliasConvBlock(input_dim, dim, 7, 1, 3, norm=norm, activation=activ, pad_type=pad_type)]
        # downsampling blocks
        for i in range(n_downsample):
            self.model += [AliasConvBlock(dim, 2 * dim, 4, 2, 1, norm=norm, activation=activ, pad_type=pad_type)]
            dim *= 2
        # residual blocks
        self.model += [AliasResBlocks(n_res, dim, norm=norm, activation=activ, pad_type=pad_type)]
        self.model = nn.Sequential(*self.model)
        self.output_dim = dim

    def forward(self, x):
        return self.model(x)


class AliasRGBDecoder(nn.Module):
    def __init__(self, dim, output_dim, n_upsample, n_res, res_norm, activ='relu', pad_type='zero'):
        super(AliasRGBDecoder, self).__init__()
        # self.model = []
        # # AdaIN residual blocks
        # self.model += [ResBlocks(n_res, dim, res_norm, activ, pad_type=pad_type)]
        # # upsampling blocks
        # for i in range(n_upsample):
        #     self.model += [nn.Upsample(scale_factor=2, mode='nearest'),
        #                    ConvBlock(dim, dim // 2, 5, 1, 2, norm='ln', activation=activ, pad_type=pad_type)]
        #     dim //= 2
        # # use reflection padding in the last conv layer
        # self.model += [ConvBlock(dim, output_dim, 7, 1, 3, norm='none', activation='tanh', pad_type=pad_type)]
        # self.model = nn.Sequential(*self.model)
        self.Res_Blocks = AliasResBlocks(n_res, dim, res_norm, activ, pad_type=pad_type)
        self.upsample_block1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv_1 = AliasConvBlock(dim, dim // 2, 5, 1, 2, norm='ln', activation=activ, pad_type=pad_type)
        dim //= 2
        self.upsample_block2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv_2 = AliasConvBlock(dim, dim // 2, 5, 1, 2, norm='ln', activation=activ, pad_type=pad_type)
        dim //= 2
        self.conv_3 = AliasConvBlock(dim, output_dim, 7, 1, 3, norm='none', activation='tanh', pad_type=pad_type)

    def forward(self, x):
        x = self.Res_Blocks(x)
        # print(x.shape)
        x = self.upsample_block1(x)
        # print(x.shape)
        x = self.conv_1(x)
        # print(x_small.shape)
        x = self.upsample_block2(x)
        # print(x.shape)
        x = self.conv_2(x)
        # print(x_middle.shape)
        x = self.conv_3(x)
        # print(x_big.shape)
        return x


class C2PGen(nn.Module):
    def __init__(self, input_dim, output_dim, dim, n_downsample, n_res, style_dim, mlp_dim, activ='relu', pad_type='reflect'):
        super(C2PGen, self).__init__()
        self.PBEnc = PixelBlockEncoder(input_dim, dim, style_dim, norm='none', activ=activ, pad_type=pad_type)
        self.RGBEnc = RGBEncoder(input_dim, dim, n_downsample, n_res, "in", activ, pad_type=pad_type)
        self.RGBDec = RGBDecoder(self.RGBEnc.output_dim, output_dim, n_downsample, n_res, res_norm='adain',
                                      activ=activ, pad_type=pad_type)
        self.MLP = MLP(style_dim, 2048, mlp_dim, 3, norm='none', activ=activ)

    def forward(self, clipart, pixelart, s=1):
        feature = self.RGBEnc(clipart)
        code = self.PBEnc(pixelart)
        result, cellcode = self.fuse(feature, code, s)
        return result#, cellcode   #return cellcode when visualizing the cell size code

    def fuse(self, content, style_code, s=1):
        #print("MLP input:code's shape:", style_code.shape)
        adain_params = self.MLP(style_code) * s # [batch,2048]
        #print("MLP output:adain_params's shape", adain_params.shape)
        #self.assign_adain_params(adain_params, self.RGBDec)
        images = self.RGBDec(content, adain_params)
        return images, adain_params

    def assign_adain_params(self, adain_params, model):
        # assign the adain_params to the AdaIN layers in model
        for m in model.modules():
            if m.__class__.__name__ == "AdaptiveInstanceNorm2d":
                mean = adain_params[:, :m.num_features]
                std = adain_params[:, m.num_features:2 * m.num_features]
                m.bias = mean.contiguous().view(-1)
                m.weight = std.contiguous().view(-1)
                if adain_params.size(1) > 2 * m.num_features:
                    adain_params = adain_params[:, 2 * m.num_features:]

    def get_num_adain_params(self, model):
        # return the number of AdaIN parameters needed by the model
        num_adain_params = 0
        for m in model.modules():
            if m.__class__.__name__ == "AdaptiveInstanceNorm2d":
                num_adain_params += 2 * m.num_features
        return num_adain_params


class PixelBlockEncoder(nn.Module):
    def __init__(self, input_dim, dim, style_dim, norm, activ, pad_type):
        super(PixelBlockEncoder, self).__init__()
        vgg19 = models.vgg.vgg19()
        vgg19.classifier._modules['6'] = nn.Linear(4096, 7, bias=True)
        vgg19.load_state_dict(torch.load('./pixelart_vgg19.pth' if not os.environ['PIX_MODEL'] else os.environ['PIX_MODEL'], map_location=torch.device('cpu'), weights_only=True))
        self.vgg = vgg19.features
        for p in self.vgg.parameters():
            p.requires_grad = False
        # vgg19 = models.vgg.vgg19(pretrained=False)
        # vgg19.load_state_dict(torch.load('./vgg.pth'))
        # self.vgg = vgg19.features
        # for p in self.vgg.parameters():
        #     p.requires_grad = False


        self.conv1 = ConvBlock(input_dim, dim, 7, 1, 3, norm=norm, activation=activ, pad_type=pad_type)  # 3->64,concat
        dim = dim * 2
        self.conv2 = ConvBlock(dim, dim, 4, 2, 1, norm=norm, activation=activ, pad_type=pad_type)  # 128->128
        dim = dim * 2
        self.conv3 = ConvBlock(dim, dim, 4, 2, 1, norm=norm, activation=activ, pad_type=pad_type)  # 256->256
        dim = dim * 2
        self.conv4 = ConvBlock(dim, dim, 4, 2, 1, norm=norm, activation=activ, pad_type=pad_type)  # 512->512
        dim = dim * 2

        self.model = []
        self.model += [nn.AdaptiveAvgPool2d(1)]  # global average pooling
        self.model += [nn.Conv2d(dim, style_dim, 1, 1, 0)]
        self.model = nn.Sequential(*self.model)
        self.output_dim = dim

    def get_features(self, image, model, layers=None):
        if layers is None:
            layers = {'0': 'conv1_1', '5': 'conv2_1', '10': 'conv3_1', '19': 'conv4_1'}
        features = {}
        x = image
        # model._modules is a dictionary holding each module in the model
        for name, layer in model._modules.items():
            x = layer(x)
            if name in layers:
                features[layers[name]] = x
        return features

    def componet_enc(self, x):
        # x [16,3,256,256]
        # factor_img [16,7,256,256]
        vgg_aux = self.get_features(x, self.vgg)  # x是3通道灰度图
        #x = torch.cat([x, factor_img], dim=1)  # [16,3+7,256,256]
        x = self.conv1(x) # 64 256 256
        x = torch.cat([x, vgg_aux['conv1_1']], dim=1)  # 128 256 256
        x = self.conv2(x)  #  128 128 128
        x = torch.cat([x, vgg_aux['conv2_1']], dim=1)  # 256 128 128
        x = self.conv3(x)  # 256 64 64
        x = torch.cat([x, vgg_aux['conv3_1']], dim=1)  # 512 64 64
        x = self.conv4(x)  # 512 32 32
        x = torch.cat([x, vgg_aux['conv4_1']], dim=1)  # 1024 32 32
        x = self.model(x)
        return x

    def forward(self, x):
        code = self.componet_enc(x)
        return code

class RGBEncoder(nn.Module):
    def __init__(self, input_dim, dim, n_downsample, n_res, norm, activ, pad_type):
        super(RGBEncoder, self).__init__()
        self.model = []
        self.model += [ConvBlock(input_dim, dim, 7, 1, 3, norm=norm, activation=activ, pad_type=pad_type)]
        # downsampling blocks
        for i in range(n_downsample):
            self.model += [ConvBlock(dim, 2 * dim, 4, 2, 1, norm=norm, activation=activ, pad_type=pad_type)]
            dim *= 2
        # residual blocks
        self.model += [ResBlocks(n_res, dim, norm=norm, activation=activ, pad_type=pad_type)]
        self.model = nn.Sequential(*self.model)
        self.output_dim = dim

    def forward(self, x):
        return self.model(x)


class RGBDecoder(nn.Module):
    def __init__(self, dim, output_dim, n_upsample, n_res, res_norm, activ='relu', pad_type='zero'):
        super(RGBDecoder, self).__init__()
        # self.model = []
        # # AdaIN residual blocks
        # self.model += [ResBlocks(n_res, dim, res_norm, activ, pad_type=pad_type)]
        # # upsampling blocks
        # for i in range(n_upsample):
        #     self.model += [nn.Upsample(scale_factor=2, mode='nearest'),
        #                    ConvBlock(dim, dim // 2, 5, 1, 2, norm='ln', activation=activ, pad_type=pad_type)]
        #     dim //= 2
        # # use reflection padding in the last conv layer
        # self.model += [ConvBlock(dim, output_dim, 7, 1, 3, norm='none', activation='tanh', pad_type=pad_type)]
        # self.model = nn.Sequential(*self.model)
        #self.Res_Blocks = ModulationResBlocks(n_res, dim, res_norm, activ, pad_type=pad_type)
        self.mod_conv_1 = ModulationConvBlock(256,256,3)
        self.mod_conv_2 = ModulationConvBlock(256,256,3)
        self.mod_conv_3 = ModulationConvBlock(256,256,3)
        self.mod_conv_4 = ModulationConvBlock(256,256,3)
        self.mod_conv_5 = ModulationConvBlock(256,256,3)
        self.mod_conv_6 = ModulationConvBlock(256,256,3)
        self.mod_conv_7 = ModulationConvBlock(256,256,3)
        self.mod_conv_8 = ModulationConvBlock(256,256,3)
        self.upsample_block1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv_1 = ConvBlock(dim, dim // 2, 5, 1, 2, norm='ln', activation=activ, pad_type=pad_type)
        dim //= 2
        self.upsample_block2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv_2 = ConvBlock(dim, dim // 2, 5, 1, 2, norm='ln', activation=activ, pad_type=pad_type)
        dim //= 2
        self.conv_3 = ConvBlock(dim, output_dim, 7, 1, 3, norm='none', activation='tanh', pad_type=pad_type)

    # def forward(self, x):
    #     residual = x
    #     out = self.model(x)
    #     out += residual
    #     return out
    def forward(self, x, code):
        residual = x
        x = self.mod_conv_1(x, code[:, :256])
        x = self.mod_conv_2(x, code[:, 256*1:256*2])
        x += residual
        residual = x
        x = self.mod_conv_2(x, code[:, 256*2:256 * 3])
        x = self.mod_conv_2(x, code[:, 256*3:256 * 4])
        x += residual
        residual =x
        x = self.mod_conv_2(x, code[:, 256*4:256 * 5])
        x = self.mod_conv_2(x, code[:, 256*5:256 * 6])
        x += residual
        residual = x
        x = self.mod_conv_2(x, code[:, 256*6:256 * 7])
        x = self.mod_conv_2(x, code[:, 256*7:256 * 8])
        x += residual
        # print(x.shape)
        x = self.upsample_block1(x)
        # print(x.shape)
        x = self.conv_1(x)
        # print(x_small.shape)
        x = self.upsample_block2(x)
        # print(x.shape)
        x = self.conv_2(x)
        # print(x_middle.shape)
        x = self.conv_3(x)
        # print(x_big.shape)
        return x