andito
/

s2s

andito HF staff commited on Sep 20

Commit

c77f359

•

1 Parent(s): c342d0f

Upload folder using huggingface_hub

Files changed (2) hide show

audio_streaming_client.py CHANGED Viewed

@@ -10,7 +10,7 @@ from dataclasses import dataclass, field
 @dataclass
 class AudioStreamingClientArguments:
     sample_rate: int = field(default=16000, metadata={"help": "Audio sample rate in Hz. Default is 16000."})
-    chunk_size: int = field(default=512, metadata={"help": "The size of audio chunks in samples. Default is 1024."})
     api_url: str = field(default="https://yxfmjcvuzgi123sw.us-east-1.aws.endpoints.huggingface.cloud", metadata={"help": "The URL of the API endpoint."})
     auth_token: str = field(default="your_auth_token", metadata={"help": "Authentication token for the API."})
@@ -56,19 +56,14 @@ class AudioStreamingClient:
         while not self.stop_event.is_set():
             if self.session_state != "processing" and not self.send_queue.empty():
                 chunk = self.send_queue.get().tobytes()
-                buffer += chunk
-                # Calculate energy of the audio chunk
-                energy = np.sum(np.square(np.frombuffer(chunk, dtype=np.int16))) / len(chunk)
-                print(f"Energy: {energy}")
-                if energy > 0.01:  # Threshold for energy detection
-                    if len(buffer) >= self.args.chunk_size * 2:  # * 2 because of int16
-                        self.send_request(buffer)
-                        buffer = b''
             else:
                 self.send_request()
-                time.sleep(0.1)
     def send_request(self, audio_data=None):
         payload = {"input_type": "speech",

 @dataclass
 class AudioStreamingClientArguments:
     sample_rate: int = field(default=16000, metadata={"help": "Audio sample rate in Hz. Default is 16000."})
+    chunk_size: int = field(default=2048, metadata={"help": "The size of audio chunks in samples. Default is 1024."})
     api_url: str = field(default="https://yxfmjcvuzgi123sw.us-east-1.aws.endpoints.huggingface.cloud", metadata={"help": "The URL of the API endpoint."})
     auth_token: str = field(default="your_auth_token", metadata={"help": "Authentication token for the API."})
         while not self.stop_event.is_set():
             if self.session_state != "processing" and not self.send_queue.empty():
                 chunk = self.send_queue.get().tobytes()
+                buffer += chunk
+                if len(buffer) >= self.args.chunk_size * 2:  # * 2 because of int16
+                    self.send_request(buffer)
+                    buffer = b''
+                    time.sleep(4*self.args.chunk_size/self.args.sample_rate)
             else:
                 self.send_request()
+                time.sleep(4*self.args.chunk_size/self.args.sample_rate)
     def send_request(self, audio_data=None):
         payload = {"input_type": "speech",

baseHandler.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from time import perf_counter
 import logging
 logger = logging.getLogger(__name__)
 class BaseHandler:
@@ -37,6 +39,7 @@ class BaseHandler:
             for output in self.process(input):
                 self._times.append(perf_counter() - start_time)
                 if self.last_time > self.min_time_to_debug:
                     logger.debug(f"{self.__class__.__name__}: {self.last_time: .3f} s")
                 self.queue_out.put(output)
                 start_time = perf_counter()

 from time import perf_counter
 import logging
+from rich.console import Console
 logger = logging.getLogger(__name__)
+console = Console()
 class BaseHandler:
             for output in self.process(input):
                 self._times.append(perf_counter() - start_time)
                 if self.last_time > self.min_time_to_debug:
+                    console.print(f"{self.__class__.__name__}: {self.last_time: .3f} s")
                     logger.debug(f"{self.__class__.__name__}: {self.last_time: .3f} s")
                 self.queue_out.put(output)
                 start_time = perf_counter()