Spaces:
Running
Running
Yurii Paniv
commited on
Commit
•
a9c23eb
1
Parent(s):
6449e88
Add speed control
Browse files- app.py +9 -2
- ukrainian_tts/tts.py +11 -10
app.py
CHANGED
@@ -57,7 +57,7 @@ print(f"CUDA available? {is_available()}")
|
|
57 |
ukr_tts = TTS()
|
58 |
|
59 |
|
60 |
-
def tts(text: str, voice: str, stress: str):
|
61 |
print("============================")
|
62 |
print("Original text:", text)
|
63 |
print("Voice", voice)
|
@@ -87,7 +87,7 @@ def tts(text: str, voice: str, stress: str):
|
|
87 |
log_queue.put([text, speaker_name, stress_selected, str(datetime.utcnow())])
|
88 |
|
89 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
90 |
-
_, text = ukr_tts.tts(text, speaker_name, stress_selected, fp)
|
91 |
return fp.name, text
|
92 |
|
93 |
|
@@ -113,6 +113,13 @@ iface = gr.Interface(
|
|
113 |
choices=[option.value for option in StressOption],
|
114 |
value=StressOption.AutomaticStress.value,
|
115 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
],
|
117 |
outputs=[
|
118 |
gr.components.Audio(label="Output"),
|
|
|
57 |
ukr_tts = TTS()
|
58 |
|
59 |
|
60 |
+
def tts(text: str, voice: str, stress: str, speed: float):
|
61 |
print("============================")
|
62 |
print("Original text:", text)
|
63 |
print("Voice", voice)
|
|
|
87 |
log_queue.put([text, speaker_name, stress_selected, str(datetime.utcnow())])
|
88 |
|
89 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
90 |
+
_, text = ukr_tts.tts(text, speaker_name, stress_selected, fp, speed)
|
91 |
return fp.name, text
|
92 |
|
93 |
|
|
|
113 |
choices=[option.value for option in StressOption],
|
114 |
value=StressOption.AutomaticStress.value,
|
115 |
),
|
116 |
+
gr.components.Slider(
|
117 |
+
label="Швидкість",
|
118 |
+
minimum=0.5,
|
119 |
+
maximum=2,
|
120 |
+
value=1,
|
121 |
+
step=0.1
|
122 |
+
)
|
123 |
],
|
124 |
outputs=[
|
125 |
gr.components.Audio(label="Output"),
|
ukrainian_tts/tts.py
CHANGED
@@ -67,19 +67,11 @@ class TTS:
|
|
67 |
text = preprocess_text(text, stress)
|
68 |
text = sentence_to_stress(text, stress_with_model if stress else stress_dict)
|
69 |
|
70 |
-
|
71 |
-
train_config="config.yaml",
|
72 |
-
model_file="model.pth",
|
73 |
-
device=self.device,
|
74 |
-
speed_control_alpha=1 / speed,
|
75 |
-
# Only for VITS
|
76 |
-
noise_scale=0.333,
|
77 |
-
noise_scale_dur=0.333,
|
78 |
-
)
|
79 |
# synthesis
|
80 |
with no_grad():
|
81 |
start = time.time()
|
82 |
-
wav = self.synthesizer(text, sids=np.array(voice))["wav"]
|
83 |
|
84 |
rtf = (time.time() - start) / (len(wav) / self.synthesizer.fs)
|
85 |
print(f"RTF = {rtf:5f}")
|
@@ -112,6 +104,15 @@ class TTS:
|
|
112 |
self.__download(model_link, model_path)
|
113 |
self.__download(config_link, config_path)
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
def __download(self, url, file_name):
|
116 |
"""Downloads file from `url` into local `file_name` file."""
|
117 |
if not exists(file_name):
|
|
|
67 |
text = preprocess_text(text, stress)
|
68 |
text = sentence_to_stress(text, stress_with_model if stress else stress_dict)
|
69 |
|
70 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
# synthesis
|
72 |
with no_grad():
|
73 |
start = time.time()
|
74 |
+
wav = self.synthesizer(text, sids=np.array(voice), decode_conf={"alpha": 1/speed})["wav"]
|
75 |
|
76 |
rtf = (time.time() - start) / (len(wav) / self.synthesizer.fs)
|
77 |
print(f"RTF = {rtf:5f}")
|
|
|
104 |
self.__download(model_link, model_path)
|
105 |
self.__download(config_link, config_path)
|
106 |
|
107 |
+
self.synthesizer = Text2Speech(
|
108 |
+
train_config="config.yaml",
|
109 |
+
model_file="model.pth",
|
110 |
+
device=self.device,
|
111 |
+
# Only for VITS
|
112 |
+
noise_scale=0.333,
|
113 |
+
noise_scale_dur=0.333,
|
114 |
+
)
|
115 |
+
|
116 |
def __download(self, url, file_name):
|
117 |
"""Downloads file from `url` into local `file_name` file."""
|
118 |
if not exists(file_name):
|