update description
Browse files- infer_onnx.py +7 -7
infer_onnx.py
CHANGED
@@ -37,6 +37,7 @@ model_matcha_mel= onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL), sess_
|
|
37 |
model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
38 |
model_matcha = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
39 |
|
|
|
40 |
def vocos_inference(mel):
|
41 |
|
42 |
with open(CONFIG_PATH, "r") as f:
|
@@ -88,6 +89,7 @@ def vocos_inference(mel):
|
|
88 |
|
89 |
return y
|
90 |
|
|
|
91 |
def tts(text:str, spk_id:int):
|
92 |
sid = np.array([int(spk_id)]) if spk_id is not None else None
|
93 |
text_matcha , text_lengths = process_text(0,text,"cpu")
|
@@ -129,20 +131,18 @@ title = """
|
|
129 |
<div
|
130 |
style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;"
|
131 |
> <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
|
132 |
-
TTS
|
133 |
</h1> </div>
|
134 |
</div>
|
135 |
"""
|
136 |
|
137 |
description = """
|
138 |
-
|
139 |
-
training and inference efficiency and naturalness by introducing adversarial learning into the duration predictor. The transformer
|
140 |
-
block was added to the normalizing flows to capture the long-term dependency when transforming the distribution.
|
141 |
-
The synthesis quality was improved by incorporating Gaussian noise into the alignment search.
|
142 |
-
|
143 |
π΅ Matcha-TTS, a new approach to non-autoregressive neural TTS, that uses conditional flow matching (similar to rectified flows) to speed up ODE-based speech synthesis
|
144 |
|
145 |
-
|
|
|
|
|
146 |
"""
|
147 |
|
148 |
article = "Training and demo by BSC."
|
|
|
37 |
model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
38 |
model_matcha = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
39 |
|
40 |
+
|
41 |
def vocos_inference(mel):
|
42 |
|
43 |
with open(CONFIG_PATH, "r") as f:
|
|
|
89 |
|
90 |
return y
|
91 |
|
92 |
+
|
93 |
def tts(text:str, spk_id:int):
|
94 |
sid = np.array([int(spk_id)]) if spk_id is not None else None
|
95 |
text_matcha , text_lengths = process_text(0,text,"cpu")
|
|
|
131 |
<div
|
132 |
style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;"
|
133 |
> <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
|
134 |
+
TTS Vocoder Comparison
|
135 |
</h1> </div>
|
136 |
</div>
|
137 |
"""
|
138 |
|
139 |
description = """
|
140 |
+
|
|
|
|
|
|
|
|
|
141 |
π΅ Matcha-TTS, a new approach to non-autoregressive neural TTS, that uses conditional flow matching (similar to rectified flows) to speed up ODE-based speech synthesis
|
142 |
|
143 |
+
For vocoders we use Hifigan universal version and Vocos trained in a catalan set of ~28 hours.
|
144 |
+
|
145 |
+
Matcha was trained using openslr69 and festcat datasets
|
146 |
"""
|
147 |
|
148 |
article = "Training and demo by BSC."
|