Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
import numpy as np | |
import paddle | |
from paddle import nn | |
import paddle.nn.functional as F | |
import paddleaudio | |
import paddleaudio.functional as audio_F | |
import random | |
## 1. RandomTimeStrech | |
class TimeStrech(nn.Layer): | |
def __init__(self, scale): | |
super(TimeStrech, self).__init__() | |
self.scale = scale | |
def forward(self, x): | |
mel_size = x.shape[-1] | |
x = F.interpolate(x, scale_factor=(1, self.scale), align_corners=False, | |
mode='bilinear').squeeze() | |
if x.shape[-1] < mel_size: | |
noise_length = (mel_size - x.shape[-1]) | |
random_pos = random.randint(0, x.shape[-1]) - noise_length | |
if random_pos < 0: | |
random_pos = 0 | |
noise = x[..., random_pos:random_pos + noise_length] | |
x = paddle.concat([x, noise], axis=-1) | |
else: | |
x = x[..., :mel_size] | |
return x.unsqueeze(1) | |
## 2. PitchShift | |
class PitchShift(nn.Layer): | |
def __init__(self, shift): | |
super(PitchShift, self).__init__() | |
self.shift = shift | |
def forward(self, x): | |
if len(x.shape) == 2: | |
x = x.unsqueeze(0) | |
x = x.squeeze() | |
mel_size = x.shape[1] | |
shift_scale = (mel_size + self.shift) / mel_size | |
x = F.interpolate(x.unsqueeze(1), scale_factor=(shift_scale, 1.), align_corners=False, | |
mode='bilinear').squeeze(1) | |
x = x[:, :mel_size] | |
if x.shape[1] < mel_size: | |
pad_size = mel_size - x.shape[1] | |
x = paddle.cat([x, paddle.zeros(x.shape[0], pad_size, x.shape[2])], axis=1) | |
x = x.squeeze() | |
return x.unsqueeze(1) | |
## 3. ShiftBias | |
class ShiftBias(nn.Layer): | |
def __init__(self, bias): | |
super(ShiftBias, self).__init__() | |
self.bias = bias | |
def forward(self, x): | |
return x + self.bias | |
## 4. Scaling | |
class SpectScaling(nn.Layer): | |
def __init__(self, scale): | |
super(SpectScaling, self).__init__() | |
self.scale = scale | |
def forward(self, x): | |
return x * self.scale | |
## 5. Time Flip | |
class TimeFlip(nn.Layer): | |
def __init__(self, length): | |
super(TimeFlip, self).__init__() | |
self.length = round(length) | |
def forward(self, x): | |
if self.length > 1: | |
start = np.random.randint(0, x.shape[-1] - self.length) | |
x_ret = x.clone() | |
x_ret[..., start:start + self.length] = paddle.flip(x[..., start:start + self.length], axis=[-1]) | |
x = x_ret | |
return x | |
class PhaseShuffle2D(nn.Layer): | |
def __init__(self, n=2): | |
super(PhaseShuffle2D, self).__init__() | |
self.n = n | |
self.random = random.Random(1) | |
def forward(self, x, move=None): | |
# x.size = (B, C, M, L) | |
if move is None: | |
move = self.random.randint(-self.n, self.n) | |
if move == 0: | |
return x | |
else: | |
left = x[:, :, :, :move] | |
right = x[:, :, :, move:] | |
shuffled = paddle.concat([right, left], axis=3) | |
return shuffled | |
def build_transforms(): | |
transforms = [ | |
lambda M: TimeStrech(1+ (np.random.random()-0.5)*M*0.2), | |
lambda M: SpectScaling(1 + (np.random.random()-1)*M*0.1), | |
lambda M: PhaseShuffle2D(192), | |
] | |
N, M = len(transforms), np.random.random() | |
composed = nn.Sequential( | |
*[trans(M) for trans in np.random.choice(transforms, N)] | |
) | |
return composed | |