AbeShinzo0708
/

ESPnet_VITS_AbeShinzo

Japanese

AbeShinzo

FormerJapanesePrimeMinister

ESPnet

VITS

Model card Files Files and versions Community

AbeShinzo0708 commited on Apr 21, 2023

Commit

0750aaf

•

1 Parent(s): 6648067

Delete config_abe.yaml

Browse files

Files changed (1) hide show

config_abe.yaml +0 -401

config_abe.yaml DELETED Viewed

@@ -1,401 +0,0 @@
-config: ./conf/tuning/finetune_full_band_vits.yaml
-print_config: false
-log_level: INFO
-dry_run: false
-iterator_type: sequence
-output_dir: exp/tts_full_band_vits
-ngpu: 1
-seed: 777
-num_workers: 4
-num_att_plot: 3
-dist_backend: nccl
-dist_init_method: env://
-dist_world_size: null
-dist_rank: null
-local_rank: 0
-dist_master_addr: null
-dist_master_port: null
-dist_launcher: null
-multiprocessing_distributed: false
-unused_parameters: true
-sharded_ddp: false
-cudnn_enabled: true
-cudnn_benchmark: false
-cudnn_deterministic: false
-collect_stats: false
-write_collected_feats: false
-max_epoch: 100
-patience: null
-val_scheduler_criterion:
-- valid
-- loss
-early_stopping_criterion:
-- valid
-- loss
-- min
-best_model_criterion:
--   - train
-    - total_count
-    - max
-keep_nbest_models: 10
-nbest_averaging_interval: 0
-grad_clip: -1
-grad_clip_type: 2.0
-grad_noise: false
-accum_grad: 1
-no_forward_run: false
-resume: true
-train_dtype: float32
-use_amp: false
-log_interval: 50
-use_matplotlib: true
-use_tensorboard: true
-create_graph_in_tensorboard: false
-use_wandb: false
-wandb_project: null
-wandb_id: null
-wandb_entity: null
-wandb_name: null
-wandb_model_log_interval: -1
-detect_anomaly: false
-pretrain_path: null
-init_param:
-- downloads/full_band_vits_accent_with_pause_pretrain/exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/train.total_count.ave_10best.pth:tts:tts
-ignore_init_mismatch: false
-freeze_param: []
-num_iters_per_epoch: 1000
-batch_size: 20
-valid_batch_size: null
-batch_bins: 100000
-valid_batch_bins: null
-train_shape_file:
-- exp/tts_stats_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/train/text_shape.phn
-- exp/tts_stats_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/train/speech_shape
-valid_shape_file:
-- exp/tts_stats_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/valid/text_shape.phn
-- exp/tts_stats_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/valid/speech_shape
-batch_type: numel
-valid_batch_type: null
-fold_length:
-- 150
-- 409600
-sort_in_batch: descending
-sort_batch: descending
-multiple_iterator: false
-chunk_length: 500
-chunk_shift_ratio: 0.5
-num_cache_chunks: 1024
-chunk_excluded_key_prefixes: []
-train_data_path_and_name_and_type:
--   - dump/44k/raw/tr_no_dev/text
-    - text
-    - text
--   - dump/44k/raw/tr_no_dev/wav.scp
-    - speech
-    - sound
-valid_data_path_and_name_and_type:
--   - dump/44k/raw/dev/text
-    - text
-    - text
--   - dump/44k/raw/dev/wav.scp
-    - speech
-    - sound
-allow_variable_data_keys: false
-max_cache_size: 0.0
-max_cache_fd: 32
-valid_max_cache_size: null
-exclude_weight_decay: false
-exclude_weight_decay_conf: {}
-optim: adamw
-optim_conf:
-    lr: 0.0001
-    betas:
-    - 0.8
-    - 0.99
-    eps: 1.0e-09
-    weight_decay: 0.0
-scheduler: exponentiallr
-scheduler_conf:
-    gamma: 0.999875
-optim2: adamw
-optim2_conf:
-    lr: 0.0001
-    betas:
-    - 0.8
-    - 0.99
-    eps: 1.0e-09
-    weight_decay: 0.0
-scheduler2: exponentiallr
-scheduler2_conf:
-    gamma: 0.999875
-generator_first: false
-token_list:
-- <blank>
-- <unk>
-- '1'
-- '2'
-- '0'
-- '3'
-- '4'
-- '-1'
-- '5'
-- a
-- o
-- '-2'
-- i
-- '-3'
-- u
-- e
-- k
-- n
-- t
-- '6'
-- r
-- '-4'
-- s
-- N
-- m
-- pau
-- '7'
-- sh
-- d
-- g
-- w
-- '8'
-- U
-- '-5'
-- I
-- cl
-- h
-- y
-- b
-- '9'
-- j
-- ts
-- ch
-- '-6'
-- z
-- p
-- '-7'
-- f
-- ky
-- ry
-- '-8'
-- gy
-- '-9'
-- hy
-- ny
-- '-10'
-- by
-- my
-- '-11'
-- '-12'
-- '-13'
-- py
-- '-14'
-- '-15'
-- v
-- '10'
-- '-16'
-- '-17'
-- '11'
-- '-21'
-- '-20'
-- '12'
-- '-19'
-- '13'
-- '-18'
-- '14'
-- dy
-- '15'
-- ty
-- '-22'
-- '16'
-- '18'
-- '19'
-- '17'
-- <sos/eos>
-odim: null
-model_conf: {}
-use_preprocessor: true
-token_type: phn
-bpemodel: null
-non_linguistic_symbols: null
-cleaner: jaconv
-g2p: pyopenjtalk_accent_with_pause
-feats_extract: linear_spectrogram
-feats_extract_conf:
-    n_fft: 2048
-    hop_length: 512
-    win_length: null
-normalize: null
-normalize_conf: {}
-tts: vits
-tts_conf:
-    generator_type: vits_generator
-    generator_params:
-        hidden_channels: 192
-        spks: -1
-        global_channels: -1
-        segment_size: 32
-        text_encoder_attention_heads: 2
-        text_encoder_ffn_expand: 4
-        text_encoder_blocks: 6
-        text_encoder_positionwise_layer_type: conv1d
-        text_encoder_positionwise_conv_kernel_size: 3
-        text_encoder_positional_encoding_layer_type: rel_pos
-        text_encoder_self_attention_layer_type: rel_selfattn
-        text_encoder_activation_type: swish
-        text_encoder_normalize_before: true
-        text_encoder_dropout_rate: 0.1
-        text_encoder_positional_dropout_rate: 0.0
-        text_encoder_attention_dropout_rate: 0.1
-        use_macaron_style_in_text_encoder: true
-        use_conformer_conv_in_text_encoder: false
-        text_encoder_conformer_kernel_size: -1
-        decoder_kernel_size: 7
-        decoder_channels: 512
-        decoder_upsample_scales:
-        - 8
-        - 8
-        - 2
-        - 2
-        - 2
-        decoder_upsample_kernel_sizes:
-        - 16
-        - 16
-        - 4
-        - 4
-        - 4
-        decoder_resblock_kernel_sizes:
-        - 3
-        - 7
-        - 11
-        decoder_resblock_dilations:
-        -   - 1
-            - 3
-            - 5
-        -   - 1
-            - 3
-            - 5
-        -   - 1
-            - 3
-            - 5
-        use_weight_norm_in_decoder: true
-        posterior_encoder_kernel_size: 5
-        posterior_encoder_layers: 16
-        posterior_encoder_stacks: 1
-        posterior_encoder_base_dilation: 1
-        posterior_encoder_dropout_rate: 0.0
-        use_weight_norm_in_posterior_encoder: true
-        flow_flows: 4
-        flow_kernel_size: 5
-        flow_base_dilation: 1
-        flow_layers: 4
-        flow_dropout_rate: 0.0
-        use_weight_norm_in_flow: true
-        use_only_mean_in_flow: true
-        stochastic_duration_predictor_kernel_size: 3
-        stochastic_duration_predictor_dropout_rate: 0.5
-        stochastic_duration_predictor_flows: 4
-        stochastic_duration_predictor_dds_conv_layers: 3
-        vocabs: 85
-        aux_channels: 1025
-    discriminator_type: hifigan_multi_scale_multi_period_discriminator
-    discriminator_params:
-        scales: 1
-        scale_downsample_pooling: AvgPool1d
-        scale_downsample_pooling_params:
-            kernel_size: 4
-            stride: 2
-            padding: 2
-        scale_discriminator_params:
-            in_channels: 1
-            out_channels: 1
-            kernel_sizes:
-            - 15
-            - 41
-            - 5
-            - 3
-            channels: 128
-            max_downsample_channels: 1024
-            max_groups: 16
-            bias: true
-            downsample_scales:
-            - 2
-            - 2
-            - 4
-            - 4
-            - 1
-            nonlinear_activation: LeakyReLU
-            nonlinear_activation_params:
-                negative_slope: 0.1
-            use_weight_norm: true
-            use_spectral_norm: false
-        follow_official_norm: false
-        periods:
-        - 2
-        - 3
-        - 5
-        - 7
-        - 11
-        period_discriminator_params:
-            in_channels: 1
-            out_channels: 1
-            kernel_sizes:
-            - 5
-            - 3
-            channels: 32
-            downsample_scales:
-            - 3
-            - 3
-            - 3
-            - 3
-            - 1
-            max_downsample_channels: 1024
-            bias: true
-            nonlinear_activation: LeakyReLU
-            nonlinear_activation_params:
-                negative_slope: 0.1
-            use_weight_norm: true
-            use_spectral_norm: false
-    generator_adv_loss_params:
-        average_by_discriminators: false
-        loss_type: mse
-    discriminator_adv_loss_params:
-        average_by_discriminators: false
-        loss_type: mse
-    feat_match_loss_params:
-        average_by_discriminators: false
-        average_by_layers: false
-        include_final_outputs: true
-    mel_loss_params:
-        fs: 44100
-        n_fft: 2048
-        hop_length: 512
-        win_length: null
-        window: hann
-        n_mels: 80
-        fmin: 0
-        fmax: null
-        log_base: null
-    lambda_adv: 1.0
-    lambda_mel: 45.0
-    lambda_feat_match: 2.0
-    lambda_dur: 1.0
-    lambda_kl: 1.0
-    sampling_rate: 44100
-    cache_generator_outputs: true
-pitch_extract: null
-pitch_extract_conf: {}
-pitch_normalize: null
-pitch_normalize_conf: {}
-energy_extract: null
-energy_extract_conf: {}
-energy_normalize: null
-energy_normalize_conf: {}
-required:
-- output_dir
-- token_list
-version: '202301'
-distributed: false