|
generator: |
|
name: SoundStream |
|
config: |
|
n_filters: 32 |
|
D: 256 |
|
target_bandwidths: |
|
- 0.5 |
|
- 1 |
|
- 1.5 |
|
- 2 |
|
- 4 |
|
ratios: |
|
- 8 |
|
- 5 |
|
- 4 |
|
- 2 |
|
sample_rate: 16000 |
|
bins: 1024 |
|
semantic_techer: wavlm_base_plus |
|
d_list: |
|
- mfd |
|
mfd: |
|
name: MultiFrequencyDiscriminator |
|
config: |
|
hop_lengths: |
|
- 32 |
|
- 64 |
|
- 128 |
|
- 256 |
|
- 512 |
|
- 1024 |
|
hidden_channels: |
|
- 64 |
|
- 128 |
|
- 256 |
|
- 512 |
|
- 512 |
|
- 512 |
|
domain: double |
|
mel_scale: true |
|
sample_rate: 16000 |
|
mpd: |
|
name: MultiPeriodDiscriminator |
|
config: |
|
period_sizes: |
|
- 2 |
|
- 3 |
|
- 5 |
|
- 7 |
|
- 11 |
|
period_kernel_size: 5 |
|
msd: |
|
name: MultiScaleDiscriminator |
|
config: |
|
num_scales: 3 |
|
pool_kernel_size: 4 |
|
pool_stride: 2 |
|
optimizer: |
|
g: |
|
name: AdamW |
|
config: |
|
lr: 0.0002 |
|
betas: |
|
- 0.8 |
|
- 0.99 |
|
eps: 1.0e-06 |
|
d: |
|
name: AdamW |
|
config: |
|
lr: 0.0002 |
|
betas: |
|
- 0.8 |
|
- 0.99 |
|
eps: 1.0e-06 |
|
lr_scheduler: |
|
g: |
|
name: ExponentialLR |
|
config: |
|
gamma: 0.999 |
|
d: |
|
name: ExponentialLR |
|
config: |
|
gamma: 0.999 |
|
criterion: |
|
g_criterion: |
|
name: losses.generator_loss.GeneratorSTFTLoss |
|
config: |
|
use_mel_loss: false |
|
adv_criterion: MSEGLoss |
|
mel_loss_weight: 45 |
|
use_feature_match: true |
|
feat_match_loss_weight: 20 |
|
use_full_stft_loss: true |
|
use_sub_stft_loss: true |
|
full_stft_loss_weight: 1 |
|
sub_stft_loss_weight: 1 |
|
mel_scale_loss: |
|
sampling_rate: 16000 |
|
n_fft: 1024 |
|
num_mels: 80 |
|
hop_size: 160 |
|
win_size: 800 |
|
fmin: 0 |
|
full_multi_scale_stft_loss: |
|
fft_sizes: |
|
- 512 |
|
- 1024 |
|
- 2048 |
|
win_sizes: |
|
- 480 |
|
- 960 |
|
- 1200 |
|
hop_sizes: |
|
- 120 |
|
- 240 |
|
- 300 |
|
sub_multi_scale_stft_loss: |
|
num_bands: 6 |
|
fft_sizes: |
|
- 128 |
|
- 256 |
|
- 256 |
|
win_sizes: |
|
- 80 |
|
- 120 |
|
- 200 |
|
hop_sizes: |
|
- 20 |
|
- 40 |
|
- 50 |
|
d_criterion: |
|
name: losses.discriminator_loss.MSEDiscriminatorLoss |
|
config: null |
|
commit_loss_weight: 1.0 |
|
codebook_loss_weight: 100 |
|
audio_norm_scale: 0.95 |
|
|