andito HF staff commited on
Commit
6383c51
1 Parent(s): 8745348

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. TTS/melo_handler.py +3 -1
  2. audio_streaming_client.py +9 -3
  3. handler.py +1 -1
TTS/melo_handler.py CHANGED
@@ -33,7 +33,7 @@ class MeloTTSHandler(BaseHandler):
33
  def setup(
34
  self,
35
  should_listen,
36
- device="mps",
37
  language="en",
38
  speaker_to_id="en",
39
  gen_kwargs={}, # Unused
@@ -41,10 +41,12 @@ class MeloTTSHandler(BaseHandler):
41
  ):
42
  self.should_listen = should_listen
43
  self.device = device
 
44
  self.language = language
45
  self.model = TTS(
46
  language=WHISPER_LANGUAGE_TO_MELO_LANGUAGE[self.language], device=device
47
  )
 
48
  self.speaker_id = self.model.hps.data.spk2id[
49
  WHISPER_LANGUAGE_TO_MELO_SPEAKER[speaker_to_id]
50
  ]
 
33
  def setup(
34
  self,
35
  should_listen,
36
+ device="auto",
37
  language="en",
38
  speaker_to_id="en",
39
  gen_kwargs={}, # Unused
 
41
  ):
42
  self.should_listen = should_listen
43
  self.device = device
44
+ console.print(f"[green]Device: {device}")
45
  self.language = language
46
  self.model = TTS(
47
  language=WHISPER_LANGUAGE_TO_MELO_LANGUAGE[self.language], device=device
48
  )
49
+ console.print(f"[green]Model device: {self.model.device}")
50
  self.speaker_id = self.model.hps.data.spk2id[
51
  WHISPER_LANGUAGE_TO_MELO_SPEAKER[speaker_to_id]
52
  ]
audio_streaming_client.py CHANGED
@@ -57,9 +57,15 @@ class AudioStreamingClient:
57
  if self.session_state != "processing" and not self.send_queue.empty():
58
  chunk = self.send_queue.get().tobytes()
59
  buffer += chunk
60
- if len(buffer) >= self.args.chunk_size * 2: # * 2 because of int16
61
- self.send_request(buffer)
62
- buffer = b''
 
 
 
 
 
 
63
  else:
64
  self.send_request()
65
  time.sleep(0.1)
 
57
  if self.session_state != "processing" and not self.send_queue.empty():
58
  chunk = self.send_queue.get().tobytes()
59
  buffer += chunk
60
+
61
+ # Calculate energy of the audio chunk
62
+ energy = np.sum(np.square(np.frombuffer(chunk, dtype=np.int16))) / len(chunk)
63
+ print(f"Energy: {energy}")
64
+
65
+ if energy > 0.01: # Threshold for energy detection
66
+ if len(buffer) >= self.args.chunk_size * 2: # * 2 because of int16
67
+ self.send_request(buffer)
68
+ buffer = b''
69
  else:
70
  self.send_request()
71
  time.sleep(0.1)
handler.py CHANGED
@@ -23,7 +23,7 @@ class EndpointHandler:
23
  self.parler_tts_handler_kwargs,
24
  self.melo_tts_handler_kwargs,
25
  self.chat_tts_handler_kwargs,
26
- ) = get_default_arguments(mode='none', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', tts='melo')
27
 
28
  setup_logger(self.module_kwargs.log_level)
29
 
 
23
  self.parler_tts_handler_kwargs,
24
  self.melo_tts_handler_kwargs,
25
  self.chat_tts_handler_kwargs,
26
+ ) = get_default_arguments(mode='none', log_level='DEBUG', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', tts='melo', device='mps')
27
 
28
  setup_logger(self.module_kwargs.log_level)
29