jhj0517 commited on
Commit
e3a6426
1 Parent(s): 3e8d967

add parameters

Browse files
app.py CHANGED
@@ -63,6 +63,8 @@ class App:
63
  nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
64
  nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
65
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
 
 
66
  with gr.Row():
67
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
68
  with gr.Row():
@@ -77,7 +79,9 @@ class App:
77
  beam_size=nb_beam_size,
78
  log_prob_threshold=nb_log_prob_threshold,
79
  no_speech_threshold=nb_no_speech_threshold,
80
- compute_type=dd_compute_type)
 
 
81
  btn_run.click(fn=self.whisper_inf.transcribe_file,
82
  inputs=params + whisper_params.to_list(),
83
  outputs=[tb_indicator, files_subtitles])
@@ -109,6 +113,8 @@ class App:
109
  nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
110
  nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
111
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
 
 
112
  with gr.Row():
113
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
114
  with gr.Row():
@@ -123,7 +129,9 @@ class App:
123
  beam_size=nb_beam_size,
124
  log_prob_threshold=nb_log_prob_threshold,
125
  no_speech_threshold=nb_no_speech_threshold,
126
- compute_type=dd_compute_type)
 
 
127
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
128
  inputs=params + whisper_params.to_list(),
129
  outputs=[tb_indicator, files_subtitles])
@@ -148,6 +156,8 @@ class App:
148
  nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
149
  nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
150
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
 
 
151
  with gr.Row():
152
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
153
  with gr.Row():
@@ -162,7 +172,9 @@ class App:
162
  beam_size=nb_beam_size,
163
  log_prob_threshold=nb_log_prob_threshold,
164
  no_speech_threshold=nb_no_speech_threshold,
165
- compute_type=dd_compute_type)
 
 
166
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
167
  inputs=params + whisper_params.to_list(),
168
  outputs=[tb_indicator, files_subtitles])
 
63
  nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
64
  nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
65
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
66
+ nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
67
+ nb_patience = gr.Number(label="Patience", value=1, interactive=True)
68
  with gr.Row():
69
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
70
  with gr.Row():
 
79
  beam_size=nb_beam_size,
80
  log_prob_threshold=nb_log_prob_threshold,
81
  no_speech_threshold=nb_no_speech_threshold,
82
+ compute_type=dd_compute_type,
83
+ best_of=nb_best_of,
84
+ patience=nb_patience)
85
  btn_run.click(fn=self.whisper_inf.transcribe_file,
86
  inputs=params + whisper_params.to_list(),
87
  outputs=[tb_indicator, files_subtitles])
 
113
  nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
114
  nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
115
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
116
+ nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
117
+ nb_patience = gr.Number(label="Patience", value=1, interactive=True)
118
  with gr.Row():
119
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
120
  with gr.Row():
 
129
  beam_size=nb_beam_size,
130
  log_prob_threshold=nb_log_prob_threshold,
131
  no_speech_threshold=nb_no_speech_threshold,
132
+ compute_type=dd_compute_type,
133
+ best_of=nb_best_of,
134
+ patience=nb_patience)
135
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
136
  inputs=params + whisper_params.to_list(),
137
  outputs=[tb_indicator, files_subtitles])
 
156
  nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
157
  nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
158
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
159
+ nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
160
+ nb_patience = gr.Number(label="Patience", value=1, interactive=True)
161
  with gr.Row():
162
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
163
  with gr.Row():
 
172
  beam_size=nb_beam_size,
173
  log_prob_threshold=nb_log_prob_threshold,
174
  no_speech_threshold=nb_no_speech_threshold,
175
+ compute_type=dd_compute_type,
176
+ best_of=nb_best_of,
177
+ patience=nb_patience)
178
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
179
  inputs=params + whisper_params.to_list(),
180
  outputs=[tb_indicator, files_subtitles])
modules/faster_whisper_inference.py CHANGED
@@ -264,6 +264,8 @@ class FasterWhisperInference(BaseInterface):
264
  beam_size=params.beam_size,
265
  log_prob_threshold=params.log_prob_threshold,
266
  no_speech_threshold=params.no_speech_threshold,
 
 
267
  )
268
  progress(0, desc="Loading audio..")
269
 
 
264
  beam_size=params.beam_size,
265
  log_prob_threshold=params.log_prob_threshold,
266
  no_speech_threshold=params.no_speech_threshold,
267
+ best_of=params.best_of,
268
+ patience=params.patience
269
  )
270
  progress(0, desc="Loading audio..")
271
 
modules/whisper_Inference.py CHANGED
@@ -255,6 +255,8 @@ class WhisperInference(BaseInterface):
255
  no_speech_threshold=params.no_speech_threshold,
256
  task="translate" if params.is_translate and self.current_model_size in self.translatable_model else "transcribe",
257
  fp16=True if params.compute_type == "float16" else False,
 
 
258
  progress_callback=progress_callback)["segments"]
259
  elapsed_time = time.time() - start_time
260
 
 
255
  no_speech_threshold=params.no_speech_threshold,
256
  task="translate" if params.is_translate and self.current_model_size in self.translatable_model else "transcribe",
257
  fp16=True if params.compute_type == "float16" else False,
258
+ best_of=params.best_of,
259
+ patience=params.patience,
260
  progress_callback=progress_callback)["segments"]
261
  elapsed_time = time.time() - start_time
262
 
modules/whisper_data_class.py CHANGED
@@ -11,6 +11,8 @@ class WhisperGradioComponents:
11
  log_prob_threshold: gr.Number
12
  no_speech_threshold: gr.Number
13
  compute_type: gr.Dropdown
 
 
14
  """
15
  A data class to pass Gradio components to the function before Gradio pre-processing.
16
  See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
@@ -35,6 +37,10 @@ class WhisperGradioComponents:
35
  compute_type: gr.Dropdown
36
  compute type for transcription.
37
  see more info : https://opennmt.net/CTranslate2/quantization.html
 
 
 
 
38
  """
39
 
40
  def to_list(self) -> list:
@@ -74,6 +80,8 @@ class WhisperValues:
74
  log_prob_threshold: float
75
  no_speech_threshold: float
76
  compute_type: str
 
 
77
  """
78
  A data class to use Whisper parameters in the function after Gradio pre-processing.
79
  See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
 
11
  log_prob_threshold: gr.Number
12
  no_speech_threshold: gr.Number
13
  compute_type: gr.Dropdown
14
+ best_of: gr.Number
15
+ patience: gr.Number
16
  """
17
  A data class to pass Gradio components to the function before Gradio pre-processing.
18
  See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
 
37
  compute_type: gr.Dropdown
38
  compute type for transcription.
39
  see more info : https://opennmt.net/CTranslate2/quantization.html
40
+ best_of: gr.Number
41
+ Number of candidates when sampling with non-zero temperature.
42
+ patience: gr.Number
43
+ Beam search patience factor.
44
  """
45
 
46
  def to_list(self) -> list:
 
80
  log_prob_threshold: float
81
  no_speech_threshold: float
82
  compute_type: str
83
+ best_of: int
84
+ patience: float
85
  """
86
  A data class to use Whisper parameters in the function after Gradio pre-processing.
87
  See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components