AbeShinzo0708 commited on
Commit
0750aaf
1 Parent(s): 6648067

Delete config_abe.yaml

Browse files
Files changed (1) hide show
  1. config_abe.yaml +0 -401
config_abe.yaml DELETED
@@ -1,401 +0,0 @@
1
- config: ./conf/tuning/finetune_full_band_vits.yaml
2
- print_config: false
3
- log_level: INFO
4
- dry_run: false
5
- iterator_type: sequence
6
- output_dir: exp/tts_full_band_vits
7
- ngpu: 1
8
- seed: 777
9
- num_workers: 4
10
- num_att_plot: 3
11
- dist_backend: nccl
12
- dist_init_method: env://
13
- dist_world_size: null
14
- dist_rank: null
15
- local_rank: 0
16
- dist_master_addr: null
17
- dist_master_port: null
18
- dist_launcher: null
19
- multiprocessing_distributed: false
20
- unused_parameters: true
21
- sharded_ddp: false
22
- cudnn_enabled: true
23
- cudnn_benchmark: false
24
- cudnn_deterministic: false
25
- collect_stats: false
26
- write_collected_feats: false
27
- max_epoch: 100
28
- patience: null
29
- val_scheduler_criterion:
30
- - valid
31
- - loss
32
- early_stopping_criterion:
33
- - valid
34
- - loss
35
- - min
36
- best_model_criterion:
37
- - - train
38
- - total_count
39
- - max
40
- keep_nbest_models: 10
41
- nbest_averaging_interval: 0
42
- grad_clip: -1
43
- grad_clip_type: 2.0
44
- grad_noise: false
45
- accum_grad: 1
46
- no_forward_run: false
47
- resume: true
48
- train_dtype: float32
49
- use_amp: false
50
- log_interval: 50
51
- use_matplotlib: true
52
- use_tensorboard: true
53
- create_graph_in_tensorboard: false
54
- use_wandb: false
55
- wandb_project: null
56
- wandb_id: null
57
- wandb_entity: null
58
- wandb_name: null
59
- wandb_model_log_interval: -1
60
- detect_anomaly: false
61
- pretrain_path: null
62
- init_param:
63
- - downloads/full_band_vits_accent_with_pause_pretrain/exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/train.total_count.ave_10best.pth:tts:tts
64
- ignore_init_mismatch: false
65
- freeze_param: []
66
- num_iters_per_epoch: 1000
67
- batch_size: 20
68
- valid_batch_size: null
69
- batch_bins: 100000
70
- valid_batch_bins: null
71
- train_shape_file:
72
- - exp/tts_stats_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/train/text_shape.phn
73
- - exp/tts_stats_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/train/speech_shape
74
- valid_shape_file:
75
- - exp/tts_stats_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/valid/text_shape.phn
76
- - exp/tts_stats_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/valid/speech_shape
77
- batch_type: numel
78
- valid_batch_type: null
79
- fold_length:
80
- - 150
81
- - 409600
82
- sort_in_batch: descending
83
- sort_batch: descending
84
- multiple_iterator: false
85
- chunk_length: 500
86
- chunk_shift_ratio: 0.5
87
- num_cache_chunks: 1024
88
- chunk_excluded_key_prefixes: []
89
- train_data_path_and_name_and_type:
90
- - - dump/44k/raw/tr_no_dev/text
91
- - text
92
- - text
93
- - - dump/44k/raw/tr_no_dev/wav.scp
94
- - speech
95
- - sound
96
- valid_data_path_and_name_and_type:
97
- - - dump/44k/raw/dev/text
98
- - text
99
- - text
100
- - - dump/44k/raw/dev/wav.scp
101
- - speech
102
- - sound
103
- allow_variable_data_keys: false
104
- max_cache_size: 0.0
105
- max_cache_fd: 32
106
- valid_max_cache_size: null
107
- exclude_weight_decay: false
108
- exclude_weight_decay_conf: {}
109
- optim: adamw
110
- optim_conf:
111
- lr: 0.0001
112
- betas:
113
- - 0.8
114
- - 0.99
115
- eps: 1.0e-09
116
- weight_decay: 0.0
117
- scheduler: exponentiallr
118
- scheduler_conf:
119
- gamma: 0.999875
120
- optim2: adamw
121
- optim2_conf:
122
- lr: 0.0001
123
- betas:
124
- - 0.8
125
- - 0.99
126
- eps: 1.0e-09
127
- weight_decay: 0.0
128
- scheduler2: exponentiallr
129
- scheduler2_conf:
130
- gamma: 0.999875
131
- generator_first: false
132
- token_list:
133
- - <blank>
134
- - <unk>
135
- - '1'
136
- - '2'
137
- - '0'
138
- - '3'
139
- - '4'
140
- - '-1'
141
- - '5'
142
- - a
143
- - o
144
- - '-2'
145
- - i
146
- - '-3'
147
- - u
148
- - e
149
- - k
150
- - n
151
- - t
152
- - '6'
153
- - r
154
- - '-4'
155
- - s
156
- - N
157
- - m
158
- - pau
159
- - '7'
160
- - sh
161
- - d
162
- - g
163
- - w
164
- - '8'
165
- - U
166
- - '-5'
167
- - I
168
- - cl
169
- - h
170
- - y
171
- - b
172
- - '9'
173
- - j
174
- - ts
175
- - ch
176
- - '-6'
177
- - z
178
- - p
179
- - '-7'
180
- - f
181
- - ky
182
- - ry
183
- - '-8'
184
- - gy
185
- - '-9'
186
- - hy
187
- - ny
188
- - '-10'
189
- - by
190
- - my
191
- - '-11'
192
- - '-12'
193
- - '-13'
194
- - py
195
- - '-14'
196
- - '-15'
197
- - v
198
- - '10'
199
- - '-16'
200
- - '-17'
201
- - '11'
202
- - '-21'
203
- - '-20'
204
- - '12'
205
- - '-19'
206
- - '13'
207
- - '-18'
208
- - '14'
209
- - dy
210
- - '15'
211
- - ty
212
- - '-22'
213
- - '16'
214
- - '18'
215
- - '19'
216
- - '17'
217
- - <sos/eos>
218
- odim: null
219
- model_conf: {}
220
- use_preprocessor: true
221
- token_type: phn
222
- bpemodel: null
223
- non_linguistic_symbols: null
224
- cleaner: jaconv
225
- g2p: pyopenjtalk_accent_with_pause
226
- feats_extract: linear_spectrogram
227
- feats_extract_conf:
228
- n_fft: 2048
229
- hop_length: 512
230
- win_length: null
231
- normalize: null
232
- normalize_conf: {}
233
- tts: vits
234
- tts_conf:
235
- generator_type: vits_generator
236
- generator_params:
237
- hidden_channels: 192
238
- spks: -1
239
- global_channels: -1
240
- segment_size: 32
241
- text_encoder_attention_heads: 2
242
- text_encoder_ffn_expand: 4
243
- text_encoder_blocks: 6
244
- text_encoder_positionwise_layer_type: conv1d
245
- text_encoder_positionwise_conv_kernel_size: 3
246
- text_encoder_positional_encoding_layer_type: rel_pos
247
- text_encoder_self_attention_layer_type: rel_selfattn
248
- text_encoder_activation_type: swish
249
- text_encoder_normalize_before: true
250
- text_encoder_dropout_rate: 0.1
251
- text_encoder_positional_dropout_rate: 0.0
252
- text_encoder_attention_dropout_rate: 0.1
253
- use_macaron_style_in_text_encoder: true
254
- use_conformer_conv_in_text_encoder: false
255
- text_encoder_conformer_kernel_size: -1
256
- decoder_kernel_size: 7
257
- decoder_channels: 512
258
- decoder_upsample_scales:
259
- - 8
260
- - 8
261
- - 2
262
- - 2
263
- - 2
264
- decoder_upsample_kernel_sizes:
265
- - 16
266
- - 16
267
- - 4
268
- - 4
269
- - 4
270
- decoder_resblock_kernel_sizes:
271
- - 3
272
- - 7
273
- - 11
274
- decoder_resblock_dilations:
275
- - - 1
276
- - 3
277
- - 5
278
- - - 1
279
- - 3
280
- - 5
281
- - - 1
282
- - 3
283
- - 5
284
- use_weight_norm_in_decoder: true
285
- posterior_encoder_kernel_size: 5
286
- posterior_encoder_layers: 16
287
- posterior_encoder_stacks: 1
288
- posterior_encoder_base_dilation: 1
289
- posterior_encoder_dropout_rate: 0.0
290
- use_weight_norm_in_posterior_encoder: true
291
- flow_flows: 4
292
- flow_kernel_size: 5
293
- flow_base_dilation: 1
294
- flow_layers: 4
295
- flow_dropout_rate: 0.0
296
- use_weight_norm_in_flow: true
297
- use_only_mean_in_flow: true
298
- stochastic_duration_predictor_kernel_size: 3
299
- stochastic_duration_predictor_dropout_rate: 0.5
300
- stochastic_duration_predictor_flows: 4
301
- stochastic_duration_predictor_dds_conv_layers: 3
302
- vocabs: 85
303
- aux_channels: 1025
304
- discriminator_type: hifigan_multi_scale_multi_period_discriminator
305
- discriminator_params:
306
- scales: 1
307
- scale_downsample_pooling: AvgPool1d
308
- scale_downsample_pooling_params:
309
- kernel_size: 4
310
- stride: 2
311
- padding: 2
312
- scale_discriminator_params:
313
- in_channels: 1
314
- out_channels: 1
315
- kernel_sizes:
316
- - 15
317
- - 41
318
- - 5
319
- - 3
320
- channels: 128
321
- max_downsample_channels: 1024
322
- max_groups: 16
323
- bias: true
324
- downsample_scales:
325
- - 2
326
- - 2
327
- - 4
328
- - 4
329
- - 1
330
- nonlinear_activation: LeakyReLU
331
- nonlinear_activation_params:
332
- negative_slope: 0.1
333
- use_weight_norm: true
334
- use_spectral_norm: false
335
- follow_official_norm: false
336
- periods:
337
- - 2
338
- - 3
339
- - 5
340
- - 7
341
- - 11
342
- period_discriminator_params:
343
- in_channels: 1
344
- out_channels: 1
345
- kernel_sizes:
346
- - 5
347
- - 3
348
- channels: 32
349
- downsample_scales:
350
- - 3
351
- - 3
352
- - 3
353
- - 3
354
- - 1
355
- max_downsample_channels: 1024
356
- bias: true
357
- nonlinear_activation: LeakyReLU
358
- nonlinear_activation_params:
359
- negative_slope: 0.1
360
- use_weight_norm: true
361
- use_spectral_norm: false
362
- generator_adv_loss_params:
363
- average_by_discriminators: false
364
- loss_type: mse
365
- discriminator_adv_loss_params:
366
- average_by_discriminators: false
367
- loss_type: mse
368
- feat_match_loss_params:
369
- average_by_discriminators: false
370
- average_by_layers: false
371
- include_final_outputs: true
372
- mel_loss_params:
373
- fs: 44100
374
- n_fft: 2048
375
- hop_length: 512
376
- win_length: null
377
- window: hann
378
- n_mels: 80
379
- fmin: 0
380
- fmax: null
381
- log_base: null
382
- lambda_adv: 1.0
383
- lambda_mel: 45.0
384
- lambda_feat_match: 2.0
385
- lambda_dur: 1.0
386
- lambda_kl: 1.0
387
- sampling_rate: 44100
388
- cache_generator_outputs: true
389
- pitch_extract: null
390
- pitch_extract_conf: {}
391
- pitch_normalize: null
392
- pitch_normalize_conf: {}
393
- energy_extract: null
394
- energy_extract_conf: {}
395
- energy_normalize: null
396
- energy_normalize_conf: {}
397
- required:
398
- - output_dir
399
- - token_list
400
- version: '202301'
401
- distributed: false