Spaces:
Running
on
A10G
Running
on
A10G
# @package __global__ | |
defaults: | |
- ../default | |
- override /dset: audio/default | |
- _self_ | |
solver: compression | |
sample_rate: ??? | |
channels: ??? | |
# loss balancing | |
losses: | |
adv: 4. | |
feat: 4. | |
l1: 0.1 | |
mel: 0. | |
msspec: 2. | |
sisnr: 0. | |
balancer: | |
balance_grads: true | |
ema_decay: 0.999 | |
per_batch_item: true | |
total_norm: 1. | |
adversarial: | |
every: 1 | |
adversaries: [msstftd] | |
adv_loss: hinge | |
feat_loss: l1 | |
# losses hyperparameters | |
l1: {} | |
l2: {} | |
mrstft: | |
factor_sc: .5 | |
factor_mag: .5 | |
normalized: false | |
mel: | |
sample_rate: ${sample_rate} | |
n_fft: 1024 | |
hop_length: 256 | |
win_length: 1024 | |
n_mels: 64 | |
f_min: 64 | |
f_max: null | |
normalized: false | |
floor_level: 1e-5 | |
sisnr: | |
sample_rate: ${sample_rate} | |
segment: 5. | |
msspec: | |
sample_rate: ${sample_rate} | |
range_start: 6 | |
range_end: 11 | |
n_mels: 64 | |
f_min: 64 | |
f_max: null | |
normalized: true | |
alphas: false | |
floor_level: 1e-5 | |
# metrics | |
metrics: | |
visqol: | |
mode: audio | |
bin: null # path to visqol install | |
model: tcdaudio14_aacvopus_coresv_svrnsim_n.68_g.01_c1.model # visqol v3 | |
# adversaries hyperparameters | |
msstftd: | |
in_channels: 1 | |
out_channels: 1 | |
filters: 32 | |
norm: weight_norm | |
n_ffts: [1024, 2048, 512, 256, 128] | |
hop_lengths: [256, 512, 128, 64, 32] | |
win_lengths: [1024, 2048, 512, 256, 128] | |
activation: LeakyReLU | |
activation_params: {negative_slope: 0.3} | |
msd: | |
in_channels: 1 | |
out_channels: 1 | |
scale_norms: [spectral_norm, weight_norm, weight_norm] | |
kernel_sizes: [5, 3] | |
filters: 16 | |
max_filters: 1024 | |
downsample_scales: [4, 4, 4, 4] | |
inner_kernel_sizes: null | |
groups: [4, 4, 4, 4] | |
strides: null | |
paddings: null | |
activation: LeakyReLU | |
activation_params: {negative_slope: 0.3} | |
mpd: | |
in_channels: 1 | |
out_channels: 1 | |
periods: [2, 3, 5, 7, 11] | |
n_layers: 5 | |
kernel_size: 5 | |
stride: 3 | |
filters: 8 | |
filter_scales: 4 | |
max_filters: 1024 | |
activation: LeakyReLU | |
activation_params: {negative_slope: 0.3} | |
norm: weight_norm | |
# data hyperparameters | |
dataset: | |
batch_size: 64 | |
num_workers: 10 | |
segment_duration: 1 | |
train: | |
num_samples: 500000 | |
valid: | |
num_samples: 10000 | |
evaluate: | |
batch_size: 32 | |
num_samples: 10000 | |
generate: | |
batch_size: 32 | |
num_samples: 50 | |
segment_duration: 10 | |
# solver hyperparameters | |
evaluate: | |
every: 25 | |
num_workers: 5 | |
metrics: | |
visqol: false | |
sisnr: true | |
generate: | |
every: 25 | |
num_workers: 5 | |
audio: | |
sample_rate: ${sample_rate} | |
# checkpointing schedule | |
checkpoint: | |
save_last: true | |
save_every: 25 | |
keep_last: 10 | |
keep_every_states: null | |
# optimization hyperparameters | |
optim: | |
epochs: 200 | |
updates_per_epoch: 2000 | |
lr: 3e-4 | |
max_norm: 0. | |
optimizer: adam | |
adam: | |
betas: [0.5, 0.9] | |
weight_decay: 0. | |
ema: | |
use: true # whether to use EMA or not | |
updates: 1 # update at every step | |
device: ${device} # device for EMA, can be put on GPU if more frequent updates | |
decay: 0.99 # EMA decay value, if null, no EMA is used | |