generator: name: SoundStream config: n_filters: 32 D: 256 target_bandwidths: - 0.5 - 1 - 1.5 - 2 - 4 ratios: - 8 - 5 - 4 - 2 sample_rate: 16000 bins: 1024 semantic_techer: wavlm_base_plus d_list: - mfd mfd: name: MultiFrequencyDiscriminator config: hop_lengths: - 32 - 64 - 128 - 256 - 512 - 1024 hidden_channels: - 64 - 128 - 256 - 512 - 512 - 512 domain: double mel_scale: true sample_rate: 16000 mpd: name: MultiPeriodDiscriminator config: period_sizes: - 2 - 3 - 5 - 7 - 11 period_kernel_size: 5 msd: name: MultiScaleDiscriminator config: num_scales: 3 pool_kernel_size: 4 pool_stride: 2 optimizer: g: name: AdamW config: lr: 0.0002 betas: - 0.8 - 0.99 eps: 1.0e-06 d: name: AdamW config: lr: 0.0002 betas: - 0.8 - 0.99 eps: 1.0e-06 lr_scheduler: g: name: ExponentialLR config: gamma: 0.999 d: name: ExponentialLR config: gamma: 0.999 criterion: g_criterion: name: losses.generator_loss.GeneratorSTFTLoss config: use_mel_loss: false adv_criterion: MSEGLoss mel_loss_weight: 45 use_feature_match: true feat_match_loss_weight: 20 use_full_stft_loss: true use_sub_stft_loss: true full_stft_loss_weight: 1 sub_stft_loss_weight: 1 mel_scale_loss: sampling_rate: 16000 n_fft: 1024 num_mels: 80 hop_size: 160 win_size: 800 fmin: 0 full_multi_scale_stft_loss: fft_sizes: - 512 - 1024 - 2048 win_sizes: - 480 - 960 - 1200 hop_sizes: - 120 - 240 - 300 sub_multi_scale_stft_loss: num_bands: 6 fft_sizes: - 128 - 256 - 256 win_sizes: - 80 - 120 - 200 hop_sizes: - 20 - 40 - 50 d_criterion: name: losses.discriminator_loss.MSEDiscriminatorLoss config: null commit_loss_weight: 1.0 codebook_loss_weight: 100 audio_norm_scale: 0.95