wetdog commited on
Commit
bbb1375
β€’
1 Parent(s): d3127d4

update description

Browse files
Files changed (1) hide show
  1. infer_onnx.py +7 -7
infer_onnx.py CHANGED
@@ -37,6 +37,7 @@ model_matcha_mel= onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL), sess_
37
  model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
38
  model_matcha = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA), sess_options=sess_options, providers=["CPUExecutionProvider"])
39
 
 
40
  def vocos_inference(mel):
41
 
42
  with open(CONFIG_PATH, "r") as f:
@@ -88,6 +89,7 @@ def vocos_inference(mel):
88
 
89
  return y
90
 
 
91
  def tts(text:str, spk_id:int):
92
  sid = np.array([int(spk_id)]) if spk_id is not None else None
93
  text_matcha , text_lengths = process_text(0,text,"cpu")
@@ -129,20 +131,18 @@ title = """
129
  <div
130
  style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;"
131
  > <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
132
- TTS Catalan Comparison
133
  </h1> </div>
134
  </div>
135
  """
136
 
137
  description = """
138
- VITS2 is an end-to-end speech synthesis model that predicts a speech waveform conditional on an input text sequence. VITS2 improved the
139
- training and inference efficiency and naturalness by introducing adversarial learning into the duration predictor. The transformer
140
- block was added to the normalizing flows to capture the long-term dependency when transforming the distribution.
141
- The synthesis quality was improved by incorporating Gaussian noise into the alignment search.
142
-
143
  🍡 Matcha-TTS, a new approach to non-autoregressive neural TTS, that uses conditional flow matching (similar to rectified flows) to speed up ODE-based speech synthesis
144
 
145
- Models are being trained in openslr69 and festcat datasets
 
 
146
  """
147
 
148
  article = "Training and demo by BSC."
 
37
  model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
38
  model_matcha = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA), sess_options=sess_options, providers=["CPUExecutionProvider"])
39
 
40
+
41
  def vocos_inference(mel):
42
 
43
  with open(CONFIG_PATH, "r") as f:
 
89
 
90
  return y
91
 
92
+
93
  def tts(text:str, spk_id:int):
94
  sid = np.array([int(spk_id)]) if spk_id is not None else None
95
  text_matcha , text_lengths = process_text(0,text,"cpu")
 
131
  <div
132
  style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;"
133
  > <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
134
+ TTS Vocoder Comparison
135
  </h1> </div>
136
  </div>
137
  """
138
 
139
  description = """
140
+
 
 
 
 
141
  🍡 Matcha-TTS, a new approach to non-autoregressive neural TTS, that uses conditional flow matching (similar to rectified flows) to speed up ODE-based speech synthesis
142
 
143
+ For vocoders we use Hifigan universal version and Vocos trained in a catalan set of ~28 hours.
144
+
145
+ Matcha was trained using openslr69 and festcat datasets
146
  """
147
 
148
  article = "Training and demo by BSC."