Upload folder using huggingface_hub
Browse files- TTS/melo_handler.py +3 -1
- audio_streaming_client.py +9 -3
- handler.py +1 -1
TTS/melo_handler.py
CHANGED
@@ -33,7 +33,7 @@ class MeloTTSHandler(BaseHandler):
|
|
33 |
def setup(
|
34 |
self,
|
35 |
should_listen,
|
36 |
-
device="
|
37 |
language="en",
|
38 |
speaker_to_id="en",
|
39 |
gen_kwargs={}, # Unused
|
@@ -41,10 +41,12 @@ class MeloTTSHandler(BaseHandler):
|
|
41 |
):
|
42 |
self.should_listen = should_listen
|
43 |
self.device = device
|
|
|
44 |
self.language = language
|
45 |
self.model = TTS(
|
46 |
language=WHISPER_LANGUAGE_TO_MELO_LANGUAGE[self.language], device=device
|
47 |
)
|
|
|
48 |
self.speaker_id = self.model.hps.data.spk2id[
|
49 |
WHISPER_LANGUAGE_TO_MELO_SPEAKER[speaker_to_id]
|
50 |
]
|
|
|
33 |
def setup(
|
34 |
self,
|
35 |
should_listen,
|
36 |
+
device="auto",
|
37 |
language="en",
|
38 |
speaker_to_id="en",
|
39 |
gen_kwargs={}, # Unused
|
|
|
41 |
):
|
42 |
self.should_listen = should_listen
|
43 |
self.device = device
|
44 |
+
console.print(f"[green]Device: {device}")
|
45 |
self.language = language
|
46 |
self.model = TTS(
|
47 |
language=WHISPER_LANGUAGE_TO_MELO_LANGUAGE[self.language], device=device
|
48 |
)
|
49 |
+
console.print(f"[green]Model device: {self.model.device}")
|
50 |
self.speaker_id = self.model.hps.data.spk2id[
|
51 |
WHISPER_LANGUAGE_TO_MELO_SPEAKER[speaker_to_id]
|
52 |
]
|
audio_streaming_client.py
CHANGED
@@ -57,9 +57,15 @@ class AudioStreamingClient:
|
|
57 |
if self.session_state != "processing" and not self.send_queue.empty():
|
58 |
chunk = self.send_queue.get().tobytes()
|
59 |
buffer += chunk
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
else:
|
64 |
self.send_request()
|
65 |
time.sleep(0.1)
|
|
|
57 |
if self.session_state != "processing" and not self.send_queue.empty():
|
58 |
chunk = self.send_queue.get().tobytes()
|
59 |
buffer += chunk
|
60 |
+
|
61 |
+
# Calculate energy of the audio chunk
|
62 |
+
energy = np.sum(np.square(np.frombuffer(chunk, dtype=np.int16))) / len(chunk)
|
63 |
+
print(f"Energy: {energy}")
|
64 |
+
|
65 |
+
if energy > 0.01: # Threshold for energy detection
|
66 |
+
if len(buffer) >= self.args.chunk_size * 2: # * 2 because of int16
|
67 |
+
self.send_request(buffer)
|
68 |
+
buffer = b''
|
69 |
else:
|
70 |
self.send_request()
|
71 |
time.sleep(0.1)
|
handler.py
CHANGED
@@ -23,7 +23,7 @@ class EndpointHandler:
|
|
23 |
self.parler_tts_handler_kwargs,
|
24 |
self.melo_tts_handler_kwargs,
|
25 |
self.chat_tts_handler_kwargs,
|
26 |
-
) = get_default_arguments(mode='none', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', tts='melo')
|
27 |
|
28 |
setup_logger(self.module_kwargs.log_level)
|
29 |
|
|
|
23 |
self.parler_tts_handler_kwargs,
|
24 |
self.melo_tts_handler_kwargs,
|
25 |
self.chat_tts_handler_kwargs,
|
26 |
+
) = get_default_arguments(mode='none', log_level='DEBUG', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', tts='melo', device='mps')
|
27 |
|
28 |
setup_logger(self.module_kwargs.log_level)
|
29 |
|