seamless-streaming

Running on T4

App Files Files Community

Anna Sun commited on Dec 1, 2023

Commit

1143e8d

•

1 Parent(s): 070b677

add dual non-expr/expressive agent, install sc from github

Browse files

Files changed (9) hide show

Dockerfile +5 -0
seamless_server/models/Seamless/vad_s2st_sc_24khz_main.yaml +25 -0
seamless_server/models/SeamlessStreaming/{vad_s2st_sc_24khz_main.yaml → vad_s2st_sc_main.yaml} +0 -0
seamless_server/requirements.txt +2 -1
seamless_server/src/simuleval_agent_directory.py +29 -8
seamless_server/src/simuleval_transcoder.py +7 -2
seamless_server/whl/seamless_communication-1.0.0-py3-none-any.whl +2 -2
streaming-react-app/src/StreamingInterface.tsx +27 -0
streaming-react-app/src/types/StreamingTypes.ts +1 -0

Dockerfile CHANGED Viewed

@@ -71,6 +71,11 @@ RUN cd seamless_server && \
 COPY --from=frontend /app/dist ./streaming-react-app/dist
 WORKDIR $HOME/app/seamless_server
 USER root
 RUN ln -s /usr/lib/x86_64-linux-gnu/libsox.so.3 /usr/lib/x86_64-linux-gnu/libsox.so
 USER user

 COPY --from=frontend /app/dist ./streaming-react-app/dist
 WORKDIR $HOME/app/seamless_server
+RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
+    huggingface-cli login --token $(cat /run/secrets/HF_TOKEN) && \
+    huggingface-cli download meta-private/SeamlessExpressive pretssel_melhifigan_wm-final.pt  --local-dir ./models/Seamless/ && \
+    ln -s $(readlink -f models/Seamless/pretssel_melhifigan_wm-final.pt) models/Seamless/pretssel_melhifigan_wm.pt
 USER root
 RUN ln -s /usr/lib/x86_64-linux-gnu/libsox.so.3 /usr/lib/x86_64-linux-gnu/libsox.so
 USER user

seamless_server/models/Seamless/vad_s2st_sc_24khz_main.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+agent_class: seamless_communication.streaming.agents.seamless_s2st.SeamlessS2STDualVocoderVADAgent
+monotonic_decoder_model_name: seamless_streaming_monotonic_decoder
+unity_model_name: seamless_streaming_unity
+sentencepiece_model: spm_256k_nllb100.model
+task: s2st
+tgt_lang: "eng"
+min_unit_chunk_size: 50
+decision_threshold: 0.7
+no_early_stop: True
+block_ngrams: True
+vocoder_name: vocoder_v2
+expr_vocoder_name: vocoder_pretssel
+gated_model_dir: .
+expr_vocoder_gain: 3.0
+upstream_idx: 1
+wav2vec_yaml: wav2vec.yaml
+min_starting_wait_w2vbert: 192
+config_yaml: cfg_fbank_u2t.yaml
+upstream_idx: 1
+detokenize_only: True
+device: cuda:0
+max_len_a: 0
+max_len_b: 1000

seamless_server/models/SeamlessStreaming/{vad_s2st_sc_24khz_main.yaml → vad_s2st_sc_main.yaml} RENAMED Viewed

File without changes

seamless_server/requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 # seamless_communication
-./whl/seamless_communication-1.0.0-py3-none-any.whl
 Flask==2.1.3
 Flask_Sockets==0.2.1
 g2p_en==2.1.0

 # seamless_communication
+git+https://github.com/facebookresearch/seamless_communication.git
+# ./whl/seamless_communication-1.0.0-py3-none-any.whl
 Flask==2.1.3
 Flask_Sockets==0.2.1
 g2p_en==2.1.0

seamless_server/src/simuleval_agent_directory.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # Creates a directory in which to look up available agents
-from typing import List
 from src.simuleval_transcoder import SimulevalTranscoder
 import json
 import logging
@@ -33,8 +34,10 @@ class AgentWithInfo:
         # Supported dynamic params are defined in StreamingTypes.ts
         dynamic_params: List[str] = [],
         description="",
     ):
         self.agent = agent
         self.name = name
         self.description = description
         self.modalities = modalities
@@ -75,6 +78,7 @@ class AgentWithInfo:
 class SimulevalAgentDirectory:
     # Available models. These are the directories where the models can be found, and also serve as an ID for the model.
     seamless_streaming_agent = "SeamlessStreaming"
     def __init__(self):
         self.agents = []
@@ -96,7 +100,12 @@ class SimulevalAgentDirectory:
                     model_id,
                 )
         except Exception as e:
             logger.warning("Failed to build agent %s: %s" % (model_id, e))
             raise e
         return agent
@@ -110,20 +119,32 @@ class SimulevalAgentDirectory:
             for agent_info in agent_infos:
                 self.add_agent(agent_info)
         else:
-            s2s_m4t_expr_agent = self.build_agent_if_available(
-                SimulevalAgentDirectory.seamless_streaming_agent,
-                config_name="vad_s2st_sc_24khz_main.yaml",
-            )
-            if s2s_m4t_expr_agent:
                 self.add_agent(
                     AgentWithInfo(
-                        agent=s2s_m4t_expr_agent,
                         name=SimulevalAgentDirectory.seamless_streaming_agent,
                         modalities=["s2t", "s2s"],
                         target_langs=M4T_P0_LANGS,
                         dynamic_params=["expressive"],
                         description="multilingual expressive model that supports S2S and S2T",
                     )
                 )
@@ -137,7 +158,7 @@ class SimulevalAgentDirectory:
     def get_agent(self, name):
         for agent in self.agents:
             if agent.name == name:
-                return agent.agent
         return None
     def get_agent_or_throw(self, name):

 # Creates a directory in which to look up available agents
+import os
+from typing import List, Optional
 from src.simuleval_transcoder import SimulevalTranscoder
 import json
 import logging
         # Supported dynamic params are defined in StreamingTypes.ts
         dynamic_params: List[str] = [],
         description="",
+        has_expressive: Optional[bool] = None,
     ):
         self.agent = agent
+        self.has_expressive = has_expressive
         self.name = name
         self.description = description
         self.modalities = modalities
 class SimulevalAgentDirectory:
     # Available models. These are the directories where the models can be found, and also serve as an ID for the model.
     seamless_streaming_agent = "SeamlessStreaming"
+    seamless_agent = "Seamless"
     def __init__(self):
         self.agents = []
                     model_id,
                 )
         except Exception as e:
+            from fairseq2.assets.error import AssetError
             logger.warning("Failed to build agent %s: %s" % (model_id, e))
+            if isinstance(e, AssetError):
+                logger.warning(
+                    "Please download gated assets and set `gated_model_dir` in the config"
+                )
             raise e
         return agent
             for agent_info in agent_infos:
                 self.add_agent(agent_info)
         else:
+            s2s_agent = None
+            if os.environ.get("USE_EXPRESSIVE_MODEL"):
+                logger.info("Building expressive model...")
+                s2s_agent = self.build_agent_if_available(
+                    SimulevalAgentDirectory.seamless_agent,
+                    config_name="vad_s2st_sc_24khz_main.yaml",
+                )
+                has_expressive = True
+            else:
+                logger.info("Building non-expressive model...")
+                s2s_agent = self.build_agent_if_available(
+                    SimulevalAgentDirectory.seamless_streaming_agent,
+                    config_name="vad_s2st_sc_main.yaml",
+                )
+                has_expressive = False
+            if s2s_agent:
                 self.add_agent(
                     AgentWithInfo(
+                        agent=s2s_agent,
                         name=SimulevalAgentDirectory.seamless_streaming_agent,
                         modalities=["s2t", "s2s"],
                         target_langs=M4T_P0_LANGS,
                         dynamic_params=["expressive"],
                         description="multilingual expressive model that supports S2S and S2T",
+                        has_expressive=has_expressive,
                     )
                 )
     def get_agent(self, name):
         for agent in self.agents:
             if agent.name == name:
+                return agent
         return None
     def get_agent_or_throw(self, name):

seamless_server/src/simuleval_transcoder.py CHANGED Viewed

@@ -119,7 +119,8 @@ class OutputSegments:
 class SimulevalTranscoder:
     def __init__(self, agent, sample_rate, debug, buffer_limit):
-        self.agent = agent
         self.input_queue = asyncio.Queue()
         self.output_queue = asyncio.Queue()
         self.states = self.agent.build_states()
@@ -185,7 +186,7 @@ class SimulevalTranscoder:
             logger.info(*args)
     @classmethod
-    def build_agent(cls, model_path, config_name="vad_s2st_main.yaml"):
         logger.info(f"Building simuleval agent: {model_path}, {config_name}")
         agent = build_system_from_dir(
             Path(__file__).resolve().parent.parent / f"models/{model_path}",
@@ -208,6 +209,10 @@ class SimulevalTranscoder:
             tgt_lang=dynamic_config.get("targetLanguage"),
             config=dynamic_config,
         )
         # # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
         self.input_queue.put_nowait(segment)

 class SimulevalTranscoder:
     def __init__(self, agent, sample_rate, debug, buffer_limit):
+        self.agent = agent.agent
+        self.has_expressive = agent.has_expressive
         self.input_queue = asyncio.Queue()
         self.output_queue = asyncio.Queue()
         self.states = self.agent.build_states()
             logger.info(*args)
     @classmethod
+    def build_agent(cls, model_path, config_name):
         logger.info(f"Building simuleval agent: {model_path}, {config_name}")
         agent = build_system_from_dir(
             Path(__file__).resolve().parent.parent / f"models/{model_path}",
             tgt_lang=dynamic_config.get("targetLanguage"),
             config=dynamic_config,
         )
+        if dynamic_config.get("expressive") is True and self.has_expressive is False:
+            logger.warning(
+                "Passing 'expressive' but the agent does not support expressive output!"
+            )
         # # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
         self.input_queue.put_nowait(segment)

seamless_server/whl/seamless_communication-1.0.0-py3-none-any.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1df10e0c85ee0ffbc9f2e1bf8896850a52c551383df0332a94d26d9d39770c85
-size 201552

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5b81add4d9917ac562c2e8a10bd5b3c88804b8bd94c56cef4e9a01ecde4a839
+size 204321

streaming-react-app/src/StreamingInterface.tsx CHANGED Viewed

@@ -165,6 +165,9 @@ export default function StreamingInterface() {
   // Dynamic Params:
   const [targetLang, setTargetLang] = useState<string | null>(null);
   const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
     debugParam ?? false,
@@ -246,6 +249,7 @@ export default function StreamingInterface() {
       setAgent((prevAgent) => {
         if (prevAgent?.name !== newAgent?.name) {
           setTargetLang(newAgent?.targetLangs[0] ?? null);
         }
         return newAgent;
       });
@@ -421,6 +425,7 @@ export default function StreamingInterface() {
       // available before actually configuring and starting the stream
       const fullDynamicConfig: DynamicConfig = {
         targetLanguage: targetLang,
       };
       await onSetDynamicConfig(fullDynamicConfig);
@@ -906,6 +911,28 @@ export default function StreamingInterface() {
                           spacing={1}
                           alignItems="flex-start"
                           sx={{flexGrow: 1}}>
                           {isListener && (
                             <Box
                               sx={{

   // Dynamic Params:
   const [targetLang, setTargetLang] = useState<string | null>(null);
+  const [enableExpressive, setEnableExpressive] = useState<boolean | null>(
+    null,
+  );
   const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
     debugParam ?? false,
       setAgent((prevAgent) => {
         if (prevAgent?.name !== newAgent?.name) {
           setTargetLang(newAgent?.targetLangs[0] ?? null);
+          setEnableExpressive(null);
         }
         return newAgent;
       });
       // available before actually configuring and starting the stream
       const fullDynamicConfig: DynamicConfig = {
         targetLanguage: targetLang,
+        expressive: enableExpressive,
       };
       await onSetDynamicConfig(fullDynamicConfig);
                           spacing={1}
                           alignItems="flex-start"
                           sx={{flexGrow: 1}}>
+                          {currentAgent?.dynamicParams?.includes(
+                            'expressive',
+                          ) && (
+                            <FormControlLabel
+                              control={
+                                <Switch
+                                  checked={enableExpressive ?? false}
+                                  onChange={(
+                                    event: React.ChangeEvent<HTMLInputElement>,
+                                  ) => {
+                                    const newValue = event.target.checked;
+                                    setEnableExpressive(newValue);
+                                    onSetDynamicConfig({
+                                      expressive: newValue,
+                                    });
+                                  }}
+                                />
+                              }
+                              label="Expressive"
+                            />
+                          )}
                           {isListener && (
                             <Box
                               sx={{

streaming-react-app/src/types/StreamingTypes.ts CHANGED Viewed

@@ -113,6 +113,7 @@ export type TranslationSentences = Array<string>;
 export type DynamicConfig = {
   // targetLanguage: a 3-letter string representing the desired output language.
   targetLanguage: string;
 };
 export type PartialDynamicConfig = Partial<DynamicConfig>;

 export type DynamicConfig = {
   // targetLanguage: a 3-letter string representing the desired output language.
   targetLanguage: string;
+  expressive: boolean | null;
 };
 export type PartialDynamicConfig = Partial<DynamicConfig>;