Kangarroar commited on
Commit
e9bd2c2
1 Parent(s): e22d040

Upload 2 files

Browse files
checkpoints/0109_hifigan_bigpopcs_hop128/config.yaml ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ adam_b1: 0.8
3
+ adam_b2: 0.99
4
+ amp: false
5
+ audio_num_mel_bins: 80
6
+ audio_sample_rate: 24000
7
+ aux_context_window: 0
8
+ #base_config:
9
+ #- egs/egs_bases/singing/pwg.yaml
10
+ #- egs/egs_bases/tts/vocoder/hifigan.yaml
11
+ binarization_args:
12
+ reset_phone_dict: true
13
+ reset_word_dict: true
14
+ shuffle: false
15
+ trim_eos_bos: false
16
+ trim_sil: false
17
+ with_align: false
18
+ with_f0: true
19
+ with_f0cwt: false
20
+ with_linear: false
21
+ with_spk_embed: false
22
+ with_spk_id: true
23
+ with_txt: false
24
+ with_wav: true
25
+ with_word: false
26
+ binarizer_cls: data_gen.tts.singing.binarize.SingingBinarizer
27
+ binary_data_dir: data/binary/big_popcs_24k_hop128
28
+ check_val_every_n_epoch: 10
29
+ clip_grad_norm: 1
30
+ clip_grad_value: 0
31
+ datasets: []
32
+ debug: false
33
+ dec_ffn_kernel_size: 9
34
+ dec_layers: 4
35
+ dict_dir: ''
36
+ disc_start_steps: 40000
37
+ discriminator_grad_norm: 1
38
+ discriminator_optimizer_params:
39
+ eps: 1.0e-06
40
+ lr: 0.0002
41
+ weight_decay: 0.0
42
+ discriminator_params:
43
+ bias: true
44
+ conv_channels: 64
45
+ in_channels: 1
46
+ kernel_size: 3
47
+ layers: 10
48
+ nonlinear_activation: LeakyReLU
49
+ nonlinear_activation_params:
50
+ negative_slope: 0.2
51
+ out_channels: 1
52
+ use_weight_norm: true
53
+ discriminator_scheduler_params:
54
+ gamma: 0.999
55
+ step_size: 600
56
+ dropout: 0.1
57
+ ds_workers: 1
58
+ enc_ffn_kernel_size: 9
59
+ enc_layers: 4
60
+ endless_ds: true
61
+ ffn_act: gelu
62
+ ffn_padding: SAME
63
+ fft_size: 512
64
+ fmax: 12000
65
+ fmin: 30
66
+ frames_multiple: 1
67
+ gen_dir_name: ''
68
+ generator_grad_norm: 10
69
+ generator_optimizer_params:
70
+ eps: 1.0e-06
71
+ lr: 0.0002
72
+ weight_decay: 0.0
73
+ generator_params:
74
+ aux_channels: 80
75
+ dropout: 0.0
76
+ gate_channels: 128
77
+ in_channels: 1
78
+ kernel_size: 3
79
+ layers: 30
80
+ out_channels: 1
81
+ residual_channels: 64
82
+ skip_channels: 64
83
+ stacks: 3
84
+ upsample_net: ConvInUpsampleNetwork
85
+ upsample_params:
86
+ upsample_scales:
87
+ - 2
88
+ - 4
89
+ - 4
90
+ - 4
91
+ use_nsf: false
92
+ use_pitch_embed: true
93
+ use_weight_norm: true
94
+ generator_scheduler_params:
95
+ gamma: 0.999
96
+ step_size: 600
97
+ griffin_lim_iters: 60
98
+ hidden_size: 256
99
+ hop_size: 128
100
+ infer: false
101
+ lambda_adv: 1.0
102
+ lambda_cdisc: 4.0
103
+ lambda_energy: 0.0
104
+ lambda_f0: 0.0
105
+ lambda_mel: 5.0
106
+ lambda_mel_adv: 1.0
107
+ lambda_ph_dur: 0.0
108
+ lambda_sent_dur: 0.0
109
+ lambda_uv: 0.0
110
+ lambda_word_dur: 0.0
111
+ load_ckpt: ''
112
+ loud_norm: false
113
+ lr: 2.0
114
+ max_epochs: 1000
115
+ max_frames: 2400
116
+ max_input_tokens: 1550
117
+ max_samples: 8192
118
+ max_sentences: 20
119
+ max_tokens: 24000
120
+ max_updates: 3000000
121
+ max_valid_sentences: 1
122
+ max_valid_tokens: 60000
123
+ mel_loss: ssim:0.5|l1:0.5
124
+ mel_vmax: 1.5
125
+ mel_vmin: -6
126
+ min_frames: 0
127
+ min_level_db: -120
128
+ num_ckpt_keep: 3
129
+ num_heads: 2
130
+ num_mels: 80
131
+ num_sanity_val_steps: 5
132
+ num_spk: 100
133
+ num_test_samples: 0
134
+ num_valid_plots: 10
135
+ optimizer_adam_beta1: 0.9
136
+ optimizer_adam_beta2: 0.98
137
+ out_wav_norm: false
138
+ pitch_extractor: parselmouth
139
+ pitch_type: frame
140
+ pre_align_args:
141
+ allow_no_txt: false
142
+ denoise: false
143
+ sox_resample: true
144
+ sox_to_wav: false
145
+ trim_sil: false
146
+ txt_processor: zh
147
+ use_tone: false
148
+ pre_align_cls: data_gen.tts.singing.pre_align.SingingPreAlign
149
+ predictor_grad: 0.0
150
+ print_nan_grads: false
151
+ processed_data_dir: ''
152
+ profile_infer: false
153
+ raw_data_dir: ''
154
+ ref_level_db: 20
155
+ rename_tmux: true
156
+ rerun_gen: true
157
+ resblock: '1'
158
+ resblock_dilation_sizes:
159
+ - - 1
160
+ - 3
161
+ - 5
162
+ - - 1
163
+ - 3
164
+ - 5
165
+ - - 1
166
+ - 3
167
+ - 5
168
+ resblock_kernel_sizes:
169
+ - 3
170
+ - 7
171
+ - 11
172
+ resume_from_checkpoint: 0
173
+ save_best: true
174
+ save_codes: []
175
+ save_f0: true
176
+ save_gt: true
177
+ scheduler: rsqrt
178
+ seed: 1234
179
+ sort_by_len: true
180
+ stft_loss_params:
181
+ fft_sizes:
182
+ - 1024
183
+ - 2048
184
+ - 512
185
+ hop_sizes:
186
+ - 120
187
+ - 240
188
+ - 50
189
+ win_lengths:
190
+ - 600
191
+ - 1200
192
+ - 240
193
+ window: hann_window
194
+ task_cls: tasks.vocoder.hifigan.HifiGanTask
195
+ tb_log_interval: 100
196
+ test_ids: []
197
+ test_input_dir: ''
198
+ test_num: 50
199
+ test_prefixes: []
200
+ test_set_name: test
201
+ train_set_name: train
202
+ train_sets: ''
203
+ upsample_initial_channel: 512
204
+ upsample_kernel_sizes:
205
+ - 16
206
+ - 16
207
+ - 4
208
+ - 4
209
+ upsample_rates:
210
+ - 8
211
+ - 4
212
+ - 2
213
+ - 2
214
+ use_cdisc: false
215
+ use_cond_disc: false
216
+ use_fm_loss: false
217
+ use_gt_dur: true
218
+ use_gt_f0: true
219
+ use_mel_loss: true
220
+ use_ms_stft: false
221
+ use_pitch_embed: true
222
+ use_ref_enc: true
223
+ use_spec_disc: false
224
+ use_spk_embed: false
225
+ use_spk_id: false
226
+ use_split_spk_id: false
227
+ val_check_interval: 2000
228
+ valid_infer_interval: 10000
229
+ valid_monitor_key: val_loss
230
+ valid_monitor_mode: min
231
+ valid_set_name: valid
232
+ vocoder: pwg
233
+ vocoder_ckpt: ''
234
+ vocoder_denoise_c: 0.0
235
+ warmup_updates: 8000
236
+ weight_decay: 0
237
+ win_length: null
238
+ win_size: 512
239
+ window: hann
240
+ word_size: 3000
241
+ work_dir: checkpoints/0109_hifigan_bigpopcs_hop128
checkpoints/0109_hifigan_bigpopcs_hop128/model_ckpt_steps_1512000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb68f3ce0c46ba0a8b6d49718f1fffdf5bd7bcab769a986fd2fd129835cc1d1
3
+ size 55827436