Update infer_onnx.py
Browse files- infer_onnx.py +22 -14
infer_onnx.py
CHANGED
@@ -37,29 +37,36 @@ def process_text(i: int, text: str, device: torch.device, cleaner:str):
|
|
37 |
return x.numpy(), x_lengths.numpy()
|
38 |
|
39 |
# paths
|
40 |
-
MODEL_PATH_MATCHA_MEL_BAL="matcha_multispeaker_cat_bal_opset_15_10_steps.onnx"
|
41 |
-
MODEL_PATH_MATCHA_MEL_CAT="matcha_multispeaker_cat_cen_opset_15_10_steps.onnx"
|
42 |
-
MODEL_PATH_MATCHA_MEL_OCC="matcha_multispeaker_cat_occ_opset_15_10_steps.onnx"
|
43 |
-
MODEL_PATH_MATCHA_MEL_VAL="matcha_multispeaker_cat_val_opset_15_10_steps.onnx"
|
|
|
|
|
|
|
44 |
MODEL_PATH_VOCOS="mel_spec_22khz_cat.onnx"
|
45 |
CONFIG_PATH="config.yaml"
|
46 |
-
SPEAKER_ID_DICT="
|
47 |
|
48 |
# Load models
|
49 |
sess_options = onnxruntime.SessionOptions()
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
55 |
|
56 |
speaker_id_dict = json.load(open(SPEAKER_ID_DICT))
|
57 |
accents = [e for e in speaker_id_dict.keys()]
|
58 |
|
59 |
-
models={"balear":model_matcha_mel_bal,
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
|
64 |
cleaners={"balear": "catalan_balear_cleaners",
|
65 |
"nord-occidental": "catalan_occidental_cleaners",
|
@@ -161,7 +168,8 @@ def tts(text:str, accent:str, spk_name:str, temperature:float, length_scale:floa
|
|
161 |
spk_id = speaker_id_dict[accent][spk_name]
|
162 |
sid = np.array([int(spk_id)]) if spk_id is not None else None
|
163 |
text_matcha , text_lengths = process_text(0,text,"cpu",cleaner=cleaners[accent])
|
164 |
-
model_matcha_mel = models[accent]
|
|
|
165 |
|
166 |
# MATCHA VOCOS
|
167 |
inputs = {
|
|
|
37 |
return x.numpy(), x_lengths.numpy()
|
38 |
|
39 |
# paths
|
40 |
+
# MODEL_PATH_MATCHA_MEL_BAL="matcha_multispeaker_cat_bal_opset_15_10_steps.onnx"
|
41 |
+
# MODEL_PATH_MATCHA_MEL_CAT="matcha_multispeaker_cat_cen_opset_15_10_steps.onnx"
|
42 |
+
# MODEL_PATH_MATCHA_MEL_OCC="matcha_multispeaker_cat_occ_opset_15_10_steps.onnx"
|
43 |
+
# MODEL_PATH_MATCHA_MEL_VAL="matcha_multispeaker_cat_val_opset_15_10_steps.onnx"
|
44 |
+
|
45 |
+
MODEL_PATH_MATCHA_MEL_ALL="matcha_multispeaker_cat_all_opset_15_10_steps.onnx"
|
46 |
+
|
47 |
MODEL_PATH_VOCOS="mel_spec_22khz_cat.onnx"
|
48 |
CONFIG_PATH="config.yaml"
|
49 |
+
SPEAKER_ID_DICT="spk_to_id_3.json"
|
50 |
|
51 |
# Load models
|
52 |
sess_options = onnxruntime.SessionOptions()
|
53 |
+
|
54 |
+
# model_matcha_mel_bal = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_BAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
55 |
+
# model_matcha_mel_cat = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_CAT), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
56 |
+
# model_matcha_mel_occ = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_OCC), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
57 |
+
# model_matcha_mel_val = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_VAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
58 |
+
|
59 |
+
model_matcha_mel_all = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_ALL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
60 |
+
|
61 |
model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
62 |
|
63 |
speaker_id_dict = json.load(open(SPEAKER_ID_DICT))
|
64 |
accents = [e for e in speaker_id_dict.keys()]
|
65 |
|
66 |
+
# models={"balear":model_matcha_mel_bal,
|
67 |
+
# "nord-occidental": model_matcha_mel_occ,
|
68 |
+
# "valencia": model_matcha_mel_val,
|
69 |
+
# "central": model_matcha_mel_cat}
|
70 |
|
71 |
cleaners={"balear": "catalan_balear_cleaners",
|
72 |
"nord-occidental": "catalan_occidental_cleaners",
|
|
|
168 |
spk_id = speaker_id_dict[accent][spk_name]
|
169 |
sid = np.array([int(spk_id)]) if spk_id is not None else None
|
170 |
text_matcha , text_lengths = process_text(0,text,"cpu",cleaner=cleaners[accent])
|
171 |
+
# model_matcha_mel = models[accent]
|
172 |
+
model_matcha_mel = model_matcha_mel_all
|
173 |
|
174 |
# MATCHA VOCOS
|
175 |
inputs = {
|