Siddhant commited on
Commit
a0177e8
1 Parent(s): 20afc54

import from zenodo

Browse files
Files changed (26) hide show
  1. README.md +50 -0
  2. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/config.yaml +394 -0
  3. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_backward_time.png +0 -0
  4. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_fake_loss.png +0 -0
  5. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_forward_time.png +0 -0
  6. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_loss.png +0 -0
  7. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_optim_step_time.png +0 -0
  8. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_real_loss.png +0 -0
  9. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_train_time.png +0 -0
  10. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_adv_loss.png +0 -0
  11. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_backward_time.png +0 -0
  12. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_dur_loss.png +0 -0
  13. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_feat_match_loss.png +0 -0
  14. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_forward_time.png +0 -0
  15. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_kl_loss.png +0 -0
  16. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_loss.png +0 -0
  17. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_mel_loss.png +0 -0
  18. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_optim_step_time.png +0 -0
  19. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_train_time.png +0 -0
  20. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/gpu_max_cached_mem_GB.png +0 -0
  21. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/iter_time.png +0 -0
  22. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/optim0_lr0.png +0 -0
  23. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/optim1_lr0.png +0 -0
  24. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/train_time.png +0 -0
  25. exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/train.total_count.ave_10best.pth +3 -0
  26. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - text-to-speech
6
+ language: ja
7
+ datasets:
8
+ - jsut
9
+ license: cc-by-4.0
10
+ ---
11
+ ## ESPnet2 TTS pretrained model
12
+ ### `kan-bayashi/jsut_full_band_vits_accent_with_pause`
13
+ ♻️ Imported from https://zenodo.org/record/5431984/
14
+
15
+ This model was trained by kan-bayashi using jsut/tts1 recipe in [espnet](https://github.com/espnet/espnet/).
16
+ ### Demo: How to use in ESPnet2
17
+ ```python
18
+ # coming soon
19
+ ```
20
+ ### Citing ESPnet
21
+ ```BibTex
22
+ @inproceedings{watanabe2018espnet,
23
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
24
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
25
+ year={2018},
26
+ booktitle={Proceedings of Interspeech},
27
+ pages={2207--2211},
28
+ doi={10.21437/Interspeech.2018-1456},
29
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
30
+ }
31
+ @inproceedings{hayashi2020espnet,
32
+ title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
33
+ author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
34
+ booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
35
+ pages={7654--7658},
36
+ year={2020},
37
+ organization={IEEE}
38
+ }
39
+ ```
40
+ or arXiv:
41
+ ```bibtex
42
+ @misc{watanabe2018espnet,
43
+ title={ESPnet: End-to-End Speech Processing Toolkit},
44
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Enrique Yalta Soplin and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
45
+ year={2018},
46
+ eprint={1804.00015},
47
+ archivePrefix={arXiv},
48
+ primaryClass={cs.CL}
49
+ }
50
+ ```
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/config.yaml ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: ./conf/tuning/train_full_band_vits.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause
7
+ ngpu: 1
8
+ seed: 777
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 60755
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: true
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 1000
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - train
38
+ - total_count
39
+ - max
40
+ keep_nbest_models: 10
41
+ grad_clip: -1
42
+ grad_clip_type: 2.0
43
+ grad_noise: false
44
+ accum_grad: 1
45
+ no_forward_run: false
46
+ resume: true
47
+ train_dtype: float32
48
+ use_amp: false
49
+ log_interval: 50
50
+ use_tensorboard: true
51
+ use_wandb: false
52
+ wandb_project: null
53
+ wandb_id: null
54
+ wandb_entity: null
55
+ wandb_name: null
56
+ wandb_model_log_interval: -1
57
+ detect_anomaly: false
58
+ pretrain_path: null
59
+ init_param: []
60
+ ignore_init_mismatch: false
61
+ freeze_param: []
62
+ num_iters_per_epoch: 500
63
+ batch_size: 20
64
+ valid_batch_size: null
65
+ batch_bins: 10000000
66
+ valid_batch_bins: null
67
+ train_shape_file:
68
+ - exp/tts_stats_raw_44.1khz_phn_jaconv_pyopenjtalk_accent_with_pause/train/text_shape.phn
69
+ - exp/tts_stats_raw_44.1khz_phn_jaconv_pyopenjtalk_accent_with_pause/train/speech_shape
70
+ valid_shape_file:
71
+ - exp/tts_stats_raw_44.1khz_phn_jaconv_pyopenjtalk_accent_with_pause/valid/text_shape.phn
72
+ - exp/tts_stats_raw_44.1khz_phn_jaconv_pyopenjtalk_accent_with_pause/valid/speech_shape
73
+ batch_type: numel
74
+ valid_batch_type: null
75
+ fold_length:
76
+ - 150
77
+ - 409600
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 500
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 1024
84
+ train_data_path_and_name_and_type:
85
+ - - dump/44.1k/raw/tr_no_dev/text
86
+ - text
87
+ - text
88
+ - - dump/44.1k/raw/tr_no_dev/wav.scp
89
+ - speech
90
+ - sound
91
+ valid_data_path_and_name_and_type:
92
+ - - dump/44.1k/raw/dev/text
93
+ - text
94
+ - text
95
+ - - dump/44.1k/raw/dev/wav.scp
96
+ - speech
97
+ - sound
98
+ allow_variable_data_keys: false
99
+ max_cache_size: 0.0
100
+ max_cache_fd: 32
101
+ valid_max_cache_size: null
102
+ optim: adamw
103
+ optim_conf:
104
+ lr: 0.0002
105
+ betas:
106
+ - 0.8
107
+ - 0.99
108
+ eps: 1.0e-09
109
+ weight_decay: 0.0
110
+ scheduler: exponentiallr
111
+ scheduler_conf:
112
+ gamma: 0.999875
113
+ optim2: adamw
114
+ optim2_conf:
115
+ lr: 0.0002
116
+ betas:
117
+ - 0.8
118
+ - 0.99
119
+ eps: 1.0e-09
120
+ weight_decay: 0.0
121
+ scheduler2: exponentiallr
122
+ scheduler2_conf:
123
+ gamma: 0.999875
124
+ generator_first: false
125
+ token_list:
126
+ - <blank>
127
+ - <unk>
128
+ - '1'
129
+ - '2'
130
+ - '0'
131
+ - '3'
132
+ - '4'
133
+ - '-1'
134
+ - '5'
135
+ - a
136
+ - o
137
+ - '-2'
138
+ - i
139
+ - '-3'
140
+ - u
141
+ - e
142
+ - k
143
+ - n
144
+ - t
145
+ - '6'
146
+ - r
147
+ - '-4'
148
+ - s
149
+ - N
150
+ - m
151
+ - pau
152
+ - '7'
153
+ - sh
154
+ - d
155
+ - g
156
+ - w
157
+ - '8'
158
+ - U
159
+ - '-5'
160
+ - I
161
+ - cl
162
+ - h
163
+ - y
164
+ - b
165
+ - '9'
166
+ - j
167
+ - ts
168
+ - ch
169
+ - '-6'
170
+ - z
171
+ - p
172
+ - '-7'
173
+ - f
174
+ - ky
175
+ - ry
176
+ - '-8'
177
+ - gy
178
+ - '-9'
179
+ - hy
180
+ - ny
181
+ - '-10'
182
+ - by
183
+ - my
184
+ - '-11'
185
+ - '-12'
186
+ - '-13'
187
+ - py
188
+ - '-14'
189
+ - '-15'
190
+ - v
191
+ - '10'
192
+ - '-16'
193
+ - '-17'
194
+ - '11'
195
+ - '-21'
196
+ - '-20'
197
+ - '12'
198
+ - '-19'
199
+ - '13'
200
+ - '-18'
201
+ - '14'
202
+ - dy
203
+ - '15'
204
+ - ty
205
+ - '-22'
206
+ - '16'
207
+ - '18'
208
+ - '19'
209
+ - '17'
210
+ - <sos/eos>
211
+ odim: null
212
+ model_conf: {}
213
+ use_preprocessor: true
214
+ token_type: phn
215
+ bpemodel: null
216
+ non_linguistic_symbols: null
217
+ cleaner: jaconv
218
+ g2p: pyopenjtalk_accent_with_pause
219
+ feats_extract: linear_spectrogram
220
+ feats_extract_conf:
221
+ n_fft: 2048
222
+ hop_length: 512
223
+ win_length: null
224
+ normalize: null
225
+ normalize_conf: {}
226
+ tts: vits
227
+ tts_conf:
228
+ generator_type: vits_generator
229
+ generator_params:
230
+ hidden_channels: 192
231
+ spks: -1
232
+ global_channels: -1
233
+ segment_size: 32
234
+ text_encoder_attention_heads: 2
235
+ text_encoder_ffn_expand: 4
236
+ text_encoder_blocks: 6
237
+ text_encoder_positionwise_layer_type: conv1d
238
+ text_encoder_positionwise_conv_kernel_size: 3
239
+ text_encoder_positional_encoding_layer_type: rel_pos
240
+ text_encoder_self_attention_layer_type: rel_selfattn
241
+ text_encoder_activation_type: swish
242
+ text_encoder_normalize_before: true
243
+ text_encoder_dropout_rate: 0.1
244
+ text_encoder_positional_dropout_rate: 0.0
245
+ text_encoder_attention_dropout_rate: 0.1
246
+ use_macaron_style_in_text_encoder: true
247
+ use_conformer_conv_in_text_encoder: false
248
+ text_encoder_conformer_kernel_size: -1
249
+ decoder_kernel_size: 7
250
+ decoder_channels: 512
251
+ decoder_upsample_scales:
252
+ - 8
253
+ - 8
254
+ - 2
255
+ - 2
256
+ - 2
257
+ decoder_upsample_kernel_sizes:
258
+ - 16
259
+ - 16
260
+ - 4
261
+ - 4
262
+ - 4
263
+ decoder_resblock_kernel_sizes:
264
+ - 3
265
+ - 7
266
+ - 11
267
+ decoder_resblock_dilations:
268
+ - - 1
269
+ - 3
270
+ - 5
271
+ - - 1
272
+ - 3
273
+ - 5
274
+ - - 1
275
+ - 3
276
+ - 5
277
+ use_weight_norm_in_decoder: true
278
+ posterior_encoder_kernel_size: 5
279
+ posterior_encoder_layers: 16
280
+ posterior_encoder_stacks: 1
281
+ posterior_encoder_base_dilation: 1
282
+ posterior_encoder_dropout_rate: 0.0
283
+ use_weight_norm_in_posterior_encoder: true
284
+ flow_flows: 4
285
+ flow_kernel_size: 5
286
+ flow_base_dilation: 1
287
+ flow_layers: 4
288
+ flow_dropout_rate: 0.0
289
+ use_weight_norm_in_flow: true
290
+ use_only_mean_in_flow: true
291
+ stochastic_duration_predictor_kernel_size: 3
292
+ stochastic_duration_predictor_dropout_rate: 0.5
293
+ stochastic_duration_predictor_flows: 4
294
+ stochastic_duration_predictor_dds_conv_layers: 3
295
+ vocabs: 85
296
+ aux_channels: 1025
297
+ discriminator_type: hifigan_multi_scale_multi_period_discriminator
298
+ discriminator_params:
299
+ scales: 1
300
+ scale_downsample_pooling: AvgPool1d
301
+ scale_downsample_pooling_params:
302
+ kernel_size: 4
303
+ stride: 2
304
+ padding: 2
305
+ scale_discriminator_params:
306
+ in_channels: 1
307
+ out_channels: 1
308
+ kernel_sizes:
309
+ - 15
310
+ - 41
311
+ - 5
312
+ - 3
313
+ channels: 128
314
+ max_downsample_channels: 1024
315
+ max_groups: 16
316
+ bias: true
317
+ downsample_scales:
318
+ - 2
319
+ - 2
320
+ - 4
321
+ - 4
322
+ - 1
323
+ nonlinear_activation: LeakyReLU
324
+ nonlinear_activation_params:
325
+ negative_slope: 0.1
326
+ use_weight_norm: true
327
+ use_spectral_norm: false
328
+ follow_official_norm: false
329
+ periods:
330
+ - 2
331
+ - 3
332
+ - 5
333
+ - 7
334
+ - 11
335
+ period_discriminator_params:
336
+ in_channels: 1
337
+ out_channels: 1
338
+ kernel_sizes:
339
+ - 5
340
+ - 3
341
+ channels: 32
342
+ downsample_scales:
343
+ - 3
344
+ - 3
345
+ - 3
346
+ - 3
347
+ - 1
348
+ max_downsample_channels: 1024
349
+ bias: true
350
+ nonlinear_activation: LeakyReLU
351
+ nonlinear_activation_params:
352
+ negative_slope: 0.1
353
+ use_weight_norm: true
354
+ use_spectral_norm: false
355
+ generator_adv_loss_params:
356
+ average_by_discriminators: false
357
+ loss_type: mse
358
+ discriminator_adv_loss_params:
359
+ average_by_discriminators: false
360
+ loss_type: mse
361
+ feat_match_loss_params:
362
+ average_by_discriminators: false
363
+ average_by_layers: false
364
+ include_final_outputs: true
365
+ mel_loss_params:
366
+ fs: 44100
367
+ n_fft: 2048
368
+ hop_length: 512
369
+ win_length: null
370
+ window: hann
371
+ n_mels: 80
372
+ fmin: 0
373
+ fmax: null
374
+ log_base: null
375
+ lambda_adv: 1.0
376
+ lambda_mel: 45.0
377
+ lambda_feat_match: 2.0
378
+ lambda_dur: 1.0
379
+ lambda_kl: 1.0
380
+ sampling_rate: 44100
381
+ cache_generator_outputs: true
382
+ pitch_extract: null
383
+ pitch_extract_conf: {}
384
+ pitch_normalize: null
385
+ pitch_normalize_conf: {}
386
+ energy_extract: null
387
+ energy_extract_conf: {}
388
+ energy_normalize: null
389
+ energy_normalize_conf: {}
390
+ required:
391
+ - output_dir
392
+ - token_list
393
+ version: 0.10.3a1
394
+ distributed: true
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_backward_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_fake_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_forward_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_optim_step_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_real_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_train_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_adv_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_backward_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_dur_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_feat_match_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_forward_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_kl_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_mel_loss.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_optim_step_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_train_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/gpu_max_cached_mem_GB.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/iter_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/optim0_lr0.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/optim1_lr0.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/train_time.png ADDED
exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/train.total_count.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb20b30d97591a8827c00a9421b9d3a06a7f92c791655cb95b05a7bdbcef0f4b
3
+ size 373285008
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.10.3a1
2
+ files:
3
+ model_file: exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/train.total_count.ave_10best.pth
4
+ python: "3.7.3 (default, Mar 27 2019, 22:11:17) \n[GCC 7.3.0]"
5
+ timestamp: 1630720780.032757
6
+ torch: 1.7.1
7
+ yaml_files:
8
+ train_config: exp/tts_train_full_band_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/config.yaml