Spaces:
Runtime error
Runtime error
import copy | |
import torch | |
import numpy as np | |
from scipy import signal | |
from librosa.filters import mel | |
from scipy.signal import get_window | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
def butter_highpass(cutoff, fs, order=5): | |
nyq = 0.5 * fs | |
normal_cutoff = cutoff / nyq | |
b, a = signal.butter(order, normal_cutoff, btype='high', analog=False) | |
return b, a | |
def pySTFT(x, fft_length=1024, hop_length=256): | |
x = np.pad(x, int(fft_length//2), mode='reflect') | |
noverlap = fft_length - hop_length | |
shape = x.shape[:-1]+((x.shape[-1]-noverlap)//hop_length, fft_length) | |
strides = x.strides[:-1]+(hop_length*x.strides[-1], x.strides[-1]) | |
result = np.lib.stride_tricks.as_strided(x, shape=shape, | |
strides=strides) | |
fft_window = get_window('hann', fft_length, fftbins=True) | |
result = np.fft.rfft(fft_window * result, n=fft_length).T | |
return np.abs(result) | |
class LinearNorm(torch.nn.Module): | |
def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): | |
super(LinearNorm, self).__init__() | |
self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias) | |
torch.nn.init.xavier_uniform_( | |
self.linear_layer.weight, | |
gain=torch.nn.init.calculate_gain(w_init_gain)) | |
def forward(self, x): | |
return self.linear_layer(x) | |
class ConvNorm(torch.nn.Module): | |
def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, | |
padding=None, dilation=1, bias=True, w_init_gain='linear'): | |
super(ConvNorm, self).__init__() | |
if padding is None: | |
assert(kernel_size % 2 == 1) | |
padding = int(dilation * (kernel_size - 1) / 2) | |
self.conv = torch.nn.Conv1d(in_channels, out_channels, | |
kernel_size=kernel_size, stride=stride, | |
padding=padding, dilation=dilation, | |
bias=bias) | |
torch.nn.init.xavier_uniform_( | |
self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain)) | |
def forward(self, signal): | |
conv_signal = self.conv(signal) | |
return conv_signal | |
def filter_bank_mean(num_rep, codes_mask, max_len_long): | |
''' | |
num_rep (B, L) | |
codes_mask (B, L) | |
output: filterbank (B, L, max_len_fake) | |
zero pad in codes must be real zero | |
''' | |
num_rep = num_rep.unsqueeze(-1) # (B, L, 1) | |
codes_mask = codes_mask.unsqueeze(-1) # (B, L, 1) | |
num_rep = num_rep * codes_mask | |
right_edge = num_rep.cumsum(dim=1) | |
left_edge = torch.zeros_like(right_edge) | |
left_edge[:, 1:, :] = right_edge[:, :-1, :] | |
right_edge = right_edge.ceil() | |
left_edge = left_edge.floor() | |
index = torch.arange(1, max_len_long+1, device=num_rep.device).view(1, 1, -1) | |
lower = index - left_edge | |
right_edge_flip = max_len_long - right_edge | |
upper = (index - right_edge_flip).flip(dims=(2,)) | |
# triangular pooling | |
fb = F.relu(torch.min(lower, upper)).float() | |
# mean pooling | |
fb = (fb > 0).float() | |
norm = fb.sum(dim=-1, keepdim=True) | |
norm[norm==0] = 1.0 | |
fb = fb / norm | |
return fb * codes_mask |