jhj0517 commited on
Commit
fd67237
1 Parent(s): c603464

add `whisper_type` arg

Browse files
Files changed (2) hide show
  1. app.py +20 -11
  2. user-start-webui.bat +5 -2
app.py CHANGED
@@ -15,17 +15,26 @@ class App:
15
  def __init__(self, args):
16
  self.args = args
17
  self.app = gr.Blocks(css=CSS, theme=self.args.theme)
18
- self.whisper_inf = WhisperInference() if self.args.disable_faster_whisper else FasterWhisperInference()
19
- if isinstance(self.whisper_inf, FasterWhisperInference):
20
- self.whisper_inf.model_dir = args.faster_whisper_model_dir
21
- print("Use Faster Whisper implementation")
22
- else:
23
- self.whisper_inf.model_dir = args.whisper_model_dir
24
- print("Use Open AI Whisper implementation")
25
  print(f"Device \"{self.whisper_inf.device}\" is detected")
26
  self.nllb_inf = NLLBInference()
27
  self.deepl_api = DeepLAPI()
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  @staticmethod
30
  def open_folder(folder_path: str):
31
  if os.path.exists(folder_path):
@@ -60,7 +69,7 @@ class App:
60
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
61
  with gr.Row():
62
  cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename", interactive=True)
63
- with gr.Accordion("VAD Options", open=False, visible=not self.args.disable_faster_whisper):
64
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
65
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
66
  nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
@@ -134,7 +143,7 @@ class App:
134
  with gr.Row():
135
  cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename",
136
  interactive=True)
137
- with gr.Accordion("VAD Options", open=False, visible=not self.args.disable_faster_whisper):
138
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
139
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
140
  nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
@@ -200,7 +209,7 @@ class App:
200
  dd_file_format = gr.Dropdown(["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
201
  with gr.Row():
202
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
203
- with gr.Accordion("VAD Options", open=False, visible=not self.args.disable_faster_whisper):
204
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
205
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
206
  nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
@@ -331,7 +340,7 @@ class App:
331
 
332
  # Create the parser for command-line arguments
333
  parser = argparse.ArgumentParser()
334
- parser.add_argument('--disable_faster_whisper', type=bool, default=False, nargs='?', const=True, help='Disable the faster_whisper implementation. faster_whipser is implemented by https://github.com/guillaumekln/faster-whisper')
335
  parser.add_argument('--share', type=bool, default=False, nargs='?', const=True, help='Gradio share value')
336
  parser.add_argument('--server_name', type=str, default=None, help='Gradio server host')
337
  parser.add_argument('--server_port', type=int, default=None, help='Gradio server port')
 
15
  def __init__(self, args):
16
  self.args = args
17
  self.app = gr.Blocks(css=CSS, theme=self.args.theme)
18
+ self.whisper_inf = self.init_whisper()
19
+ print(f"Use \"{self.args.whisper_type}\" implementation")
 
 
 
 
 
20
  print(f"Device \"{self.whisper_inf.device}\" is detected")
21
  self.nllb_inf = NLLBInference()
22
  self.deepl_api = DeepLAPI()
23
 
24
+ def init_whisper(self):
25
+ whisper_type = self.args.whisper_type.lower().strip()
26
+
27
+ if whisper_type in ["faster_whisper", "faster-whisper"]:
28
+ whisper_inf = FasterWhisperInference()
29
+ whisper_inf.model_dir = self.args.faster_whisper_model_dir
30
+ if whisper_type in ["whisper"]:
31
+ whisper_inf = WhisperInference()
32
+ whisper_inf.model_dir = self.args.whisper_model_dir
33
+ else:
34
+ whisper_inf = FasterWhisperInference()
35
+ whisper_inf.model_dir = self.args.faster_whisper_model_dir
36
+ return whisper_inf
37
+
38
  @staticmethod
39
  def open_folder(folder_path: str):
40
  if os.path.exists(folder_path):
 
69
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
70
  with gr.Row():
71
  cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename", interactive=True)
72
+ with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
73
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
74
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
75
  nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
 
143
  with gr.Row():
144
  cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename",
145
  interactive=True)
146
+ with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
147
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
148
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
149
  nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
 
209
  dd_file_format = gr.Dropdown(["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
210
  with gr.Row():
211
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
212
+ with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
213
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
214
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
215
  nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
 
340
 
341
  # Create the parser for command-line arguments
342
  parser = argparse.ArgumentParser()
343
+ parser.add_argument('--whisper_type', type=str, default="faster-whisper", help='A type of the whisper implementation between: ["whisper", "faster-whisper"]')
344
  parser.add_argument('--share', type=bool, default=False, nargs='?', const=True, help='Gradio share value')
345
  parser.add_argument('--server_name', type=str, default=None, help='Gradio server host')
346
  parser.add_argument('--server_port', type=int, default=None, help='Gradio server port')
user-start-webui.bat CHANGED
@@ -8,8 +8,8 @@ set USERNAME=
8
  set PASSWORD=
9
  set SHARE=
10
  set THEME=
11
- set DISABLE_FASTER_WHISPER=
12
  set API_OPEN=
 
13
  set WHISPER_MODEL_DIR=
14
  set FASTER_WHISPER_MODEL_DIR=
15
 
@@ -38,6 +38,9 @@ if /I "%DISABLE_FASTER_WHISPER%"=="true" (
38
  if /I "%API_OPEN%"=="true" (
39
  set API_OPEN=--api_open
40
  )
 
 
 
41
  if not "%WHISPER_MODEL_DIR%"=="" (
42
  set WHISPER_MODEL_DIR_ARG=--whisper_model_dir "%WHISPER_MODEL_DIR%"
43
  )
@@ -46,5 +49,5 @@ if not "%FASTER_WHISPER_MODEL_DIR%"=="" (
46
  )
47
 
48
  :: Call the original .bat script with optional arguments
49
- start-webui.bat %SERVER_NAME_ARG% %SERVER_PORT_ARG% %USERNAME_ARG% %PASSWORD_ARG% %SHARE_ARG% %THEME_ARG% %DISABLE_FASTER_WHISPER_ARG% %API_OPEN% %WHISPER_MODEL_DIR_ARG% %FASTER_WHISPER_MODEL_DIR_ARG%
50
  pause
 
8
  set PASSWORD=
9
  set SHARE=
10
  set THEME=
 
11
  set API_OPEN=
12
+ set WHISPER_TYPE=
13
  set WHISPER_MODEL_DIR=
14
  set FASTER_WHISPER_MODEL_DIR=
15
 
 
38
  if /I "%API_OPEN%"=="true" (
39
  set API_OPEN=--api_open
40
  )
41
+ if not "%WHISPER_TYPE%"=="" (
42
+ set WHISPER_TYPE_ARG=--whisper_type %WHISPER_TYPE%
43
+ )
44
  if not "%WHISPER_MODEL_DIR%"=="" (
45
  set WHISPER_MODEL_DIR_ARG=--whisper_model_dir "%WHISPER_MODEL_DIR%"
46
  )
 
49
  )
50
 
51
  :: Call the original .bat script with optional arguments
52
+ start-webui.bat %SERVER_NAME_ARG% %SERVER_PORT_ARG% %USERNAME_ARG% %PASSWORD_ARG% %SHARE_ARG% %THEME_ARG% %API_OPEN% %WHISPER_TYPE_ARG% %WHISPER_MODEL_DIR_ARG% %FASTER_WHISPER_MODEL_DIR_ARG%
53
  pause