Yurii Paniv commited on
Commit
a9c23eb
1 Parent(s): 6449e88

Add speed control

Browse files
Files changed (2) hide show
  1. app.py +9 -2
  2. ukrainian_tts/tts.py +11 -10
app.py CHANGED
@@ -57,7 +57,7 @@ print(f"CUDA available? {is_available()}")
57
  ukr_tts = TTS()
58
 
59
 
60
- def tts(text: str, voice: str, stress: str):
61
  print("============================")
62
  print("Original text:", text)
63
  print("Voice", voice)
@@ -87,7 +87,7 @@ def tts(text: str, voice: str, stress: str):
87
  log_queue.put([text, speaker_name, stress_selected, str(datetime.utcnow())])
88
 
89
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
90
- _, text = ukr_tts.tts(text, speaker_name, stress_selected, fp)
91
  return fp.name, text
92
 
93
 
@@ -113,6 +113,13 @@ iface = gr.Interface(
113
  choices=[option.value for option in StressOption],
114
  value=StressOption.AutomaticStress.value,
115
  ),
 
 
 
 
 
 
 
116
  ],
117
  outputs=[
118
  gr.components.Audio(label="Output"),
 
57
  ukr_tts = TTS()
58
 
59
 
60
+ def tts(text: str, voice: str, stress: str, speed: float):
61
  print("============================")
62
  print("Original text:", text)
63
  print("Voice", voice)
 
87
  log_queue.put([text, speaker_name, stress_selected, str(datetime.utcnow())])
88
 
89
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
90
+ _, text = ukr_tts.tts(text, speaker_name, stress_selected, fp, speed)
91
  return fp.name, text
92
 
93
 
 
113
  choices=[option.value for option in StressOption],
114
  value=StressOption.AutomaticStress.value,
115
  ),
116
+ gr.components.Slider(
117
+ label="Швидкість",
118
+ minimum=0.5,
119
+ maximum=2,
120
+ value=1,
121
+ step=0.1
122
+ )
123
  ],
124
  outputs=[
125
  gr.components.Audio(label="Output"),
ukrainian_tts/tts.py CHANGED
@@ -67,19 +67,11 @@ class TTS:
67
  text = preprocess_text(text, stress)
68
  text = sentence_to_stress(text, stress_with_model if stress else stress_dict)
69
 
70
- self.synthesizer = Text2Speech(
71
- train_config="config.yaml",
72
- model_file="model.pth",
73
- device=self.device,
74
- speed_control_alpha=1 / speed,
75
- # Only for VITS
76
- noise_scale=0.333,
77
- noise_scale_dur=0.333,
78
- )
79
  # synthesis
80
  with no_grad():
81
  start = time.time()
82
- wav = self.synthesizer(text, sids=np.array(voice))["wav"]
83
 
84
  rtf = (time.time() - start) / (len(wav) / self.synthesizer.fs)
85
  print(f"RTF = {rtf:5f}")
@@ -112,6 +104,15 @@ class TTS:
112
  self.__download(model_link, model_path)
113
  self.__download(config_link, config_path)
114
 
 
 
 
 
 
 
 
 
 
115
  def __download(self, url, file_name):
116
  """Downloads file from `url` into local `file_name` file."""
117
  if not exists(file_name):
 
67
  text = preprocess_text(text, stress)
68
  text = sentence_to_stress(text, stress_with_model if stress else stress_dict)
69
 
70
+
 
 
 
 
 
 
 
 
71
  # synthesis
72
  with no_grad():
73
  start = time.time()
74
+ wav = self.synthesizer(text, sids=np.array(voice), decode_conf={"alpha": 1/speed})["wav"]
75
 
76
  rtf = (time.time() - start) / (len(wav) / self.synthesizer.fs)
77
  print(f"RTF = {rtf:5f}")
 
104
  self.__download(model_link, model_path)
105
  self.__download(config_link, config_path)
106
 
107
+ self.synthesizer = Text2Speech(
108
+ train_config="config.yaml",
109
+ model_file="model.pth",
110
+ device=self.device,
111
+ # Only for VITS
112
+ noise_scale=0.333,
113
+ noise_scale_dur=0.333,
114
+ )
115
+
116
  def __download(self, url, file_name):
117
  """Downloads file from `url` into local `file_name` file."""
118
  if not exists(file_name):