jhj0517 commited on
Commit
f02937a
β€’
2 Parent(s): daf4d68 1f9180b

Merge pull request #76 from jhj0517/colab-button

Browse files
app.py CHANGED
@@ -44,7 +44,7 @@ class App:
44
  with gr.Tabs():
45
  with gr.TabItem("File"): # tab1
46
  with gr.Row():
47
- input_file = gr.Files(type="file", label="Upload File here")
48
  with gr.Row():
49
  dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v3",
50
  label="Model")
@@ -63,14 +63,15 @@ class App:
63
  with gr.Row():
64
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
65
  with gr.Row():
66
- tb_indicator = gr.Textbox(label="Output", scale=8)
67
- btn_openfolder = gr.Button('πŸ“‚', scale=2)
 
68
 
69
  params = [input_file, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
70
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
71
  btn_run.click(fn=self.whisper_inf.transcribe_file,
72
  inputs=params + advanced_params,
73
- outputs=[tb_indicator])
74
  btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
75
  dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
76
 
@@ -102,14 +103,15 @@ class App:
102
  with gr.Row():
103
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
104
  with gr.Row():
105
- tb_indicator = gr.Textbox(label="Output", scale=8)
106
- btn_openfolder = gr.Button('πŸ“‚', scale=2)
 
107
 
108
  params = [tb_youtubelink, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
109
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
110
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
111
  inputs=params + advanced_params,
112
- outputs=[tb_indicator])
113
  tb_youtubelink.change(get_ytmetas, inputs=[tb_youtubelink],
114
  outputs=[img_thumbnail, tb_title, tb_description])
115
  btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
@@ -134,20 +136,21 @@ class App:
134
  with gr.Row():
135
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
136
  with gr.Row():
137
- tb_indicator = gr.Textbox(label="Output", scale=8)
138
- btn_openfolder = gr.Button('πŸ“‚', scale=2)
 
139
 
140
  params = [mic_input, dd_model, dd_lang, dd_file_format, cb_translate]
141
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
142
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
143
  inputs=params + advanced_params,
144
- outputs=[tb_indicator])
145
  btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
146
  dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
147
 
148
  with gr.TabItem("T2T Translation"): # tab 4
149
  with gr.Row():
150
- file_subs = gr.Files(type="file", label="Upload Subtitle Files to translate here",
151
  file_types=['.vtt', '.srt'])
152
 
153
  with gr.TabItem("NLLB"): # sub tab1
@@ -164,14 +167,16 @@ class App:
164
  with gr.Row():
165
  btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
166
  with gr.Row():
167
- tb_indicator = gr.Textbox(label="Output", scale=8)
168
- btn_openfolder = gr.Button('πŸ“‚', scale=2)
 
169
  with gr.Column():
170
  md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
171
 
172
  btn_run.click(fn=self.nllb_inf.translate_file,
173
  inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang, cb_timestamp],
174
- outputs=[tb_indicator])
 
175
  btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
176
  inputs=None,
177
  outputs=None)
 
44
  with gr.Tabs():
45
  with gr.TabItem("File"): # tab1
46
  with gr.Row():
47
+ input_file = gr.Files(type="filepath", label="Upload File here")
48
  with gr.Row():
49
  dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v3",
50
  label="Model")
 
63
  with gr.Row():
64
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
65
  with gr.Row():
66
+ tb_indicator = gr.Textbox(label="Output", scale=4)
67
+ files_subtitles = gr.Files(label="Downloadable output file", scale=4, interactive=False)
68
+ btn_openfolder = gr.Button('πŸ“‚', scale=1)
69
 
70
  params = [input_file, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
71
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
72
  btn_run.click(fn=self.whisper_inf.transcribe_file,
73
  inputs=params + advanced_params,
74
+ outputs=[tb_indicator, files_subtitles])
75
  btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
76
  dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
77
 
 
103
  with gr.Row():
104
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
105
  with gr.Row():
106
+ tb_indicator = gr.Textbox(label="Output", scale=4)
107
+ files_subtitles = gr.Files(label="Downloadable output file", scale=4)
108
+ btn_openfolder = gr.Button('πŸ“‚', scale=1)
109
 
110
  params = [tb_youtubelink, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
111
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
112
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
113
  inputs=params + advanced_params,
114
+ outputs=[tb_indicator, files_subtitles])
115
  tb_youtubelink.change(get_ytmetas, inputs=[tb_youtubelink],
116
  outputs=[img_thumbnail, tb_title, tb_description])
117
  btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
 
136
  with gr.Row():
137
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
138
  with gr.Row():
139
+ tb_indicator = gr.Textbox(label="Output", scale=4)
140
+ files_subtitles = gr.Files(label="Downloadable output file", scale=4)
141
+ btn_openfolder = gr.Button('πŸ“‚', scale=1)
142
 
143
  params = [mic_input, dd_model, dd_lang, dd_file_format, cb_translate]
144
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
145
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
146
  inputs=params + advanced_params,
147
+ outputs=[tb_indicator, files_subtitles])
148
  btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
149
  dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
150
 
151
  with gr.TabItem("T2T Translation"): # tab 4
152
  with gr.Row():
153
+ file_subs = gr.Files(type="filepath", label="Upload Subtitle Files to translate here",
154
  file_types=['.vtt', '.srt'])
155
 
156
  with gr.TabItem("NLLB"): # sub tab1
 
167
  with gr.Row():
168
  btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
169
  with gr.Row():
170
+ tb_indicator = gr.Textbox(label="Output", scale=4)
171
+ files_subtitles = gr.Files(label="Downloadable output file", scale=4)
172
+ btn_openfolder = gr.Button('πŸ“‚', scale=1)
173
  with gr.Column():
174
  md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
175
 
176
  btn_run.click(fn=self.nllb_inf.translate_file,
177
  inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang, cb_timestamp],
178
+ outputs=[tb_indicator, files_subtitles])
179
+
180
  btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
181
  inputs=None,
182
  outputs=None)
modules/faster_whisper_inference.py CHANGED
@@ -42,7 +42,7 @@ class FasterWhisperInference(BaseInterface):
42
  no_speech_threshold: float,
43
  compute_type: str,
44
  progress=gr.Progress()
45
- ) -> str:
46
  """
47
  Write subtitle file from Files
48
 
@@ -78,7 +78,9 @@ class FasterWhisperInference(BaseInterface):
78
 
79
  Returns
80
  ----------
 
81
  String to return to gr.Textbox()
 
82
  """
83
  try:
84
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -95,16 +97,15 @@ class FasterWhisperInference(BaseInterface):
95
  progress=progress
96
  )
97
 
98
- file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
99
  file_name = safe_filename(file_name)
100
- subtitle = self.generate_and_write_file(
101
  file_name=file_name,
102
  transcribed_segments=transcribed_segments,
103
  add_timestamp=add_timestamp,
104
  file_format=file_format
105
  )
106
- print(f"{subtitle}")
107
- files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task}
108
 
109
  total_result = ''
110
  total_time = 0
@@ -114,7 +115,10 @@ class FasterWhisperInference(BaseInterface):
114
  total_result += f'{info["subtitle"]}'
115
  total_time += info["time_for_task"]
116
 
117
- return f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
 
 
 
118
 
119
  except Exception as e:
120
  print(f"Error transcribing file on line {e}")
@@ -134,7 +138,7 @@ class FasterWhisperInference(BaseInterface):
134
  no_speech_threshold: float,
135
  compute_type: str,
136
  progress=gr.Progress()
137
- ) -> str:
138
  """
139
  Write subtitle file from Youtube
140
 
@@ -170,7 +174,9 @@ class FasterWhisperInference(BaseInterface):
170
 
171
  Returns
172
  ----------
 
173
  String to return to gr.Textbox()
 
174
  """
175
  try:
176
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -192,15 +198,18 @@ class FasterWhisperInference(BaseInterface):
192
  progress(1, desc="Completed!")
193
 
194
  file_name = safe_filename(yt.title)
195
- subtitle = self.generate_and_write_file(
196
  file_name=file_name,
197
  transcribed_segments=transcribed_segments,
198
  add_timestamp=add_timestamp,
199
  file_format=file_format
200
  )
201
- return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
 
 
 
202
  except Exception as e:
203
- return f"Error: {str(e)}"
204
  finally:
205
  try:
206
  if 'yt' not in locals():
@@ -225,7 +234,7 @@ class FasterWhisperInference(BaseInterface):
225
  no_speech_threshold: float,
226
  compute_type: str,
227
  progress=gr.Progress()
228
- ) -> str:
229
  """
230
  Write subtitle file from microphone
231
 
@@ -259,7 +268,9 @@ class FasterWhisperInference(BaseInterface):
259
 
260
  Returns
261
  ----------
 
262
  String to return to gr.Textbox()
 
263
  """
264
  try:
265
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -277,15 +288,17 @@ class FasterWhisperInference(BaseInterface):
277
  )
278
  progress(1, desc="Completed!")
279
 
280
- subtitle = self.generate_and_write_file(
281
  file_name="Mic",
282
  transcribed_segments=transcribed_segments,
283
  add_timestamp=True,
284
  file_format=file_format
285
  )
286
- return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
 
 
287
  except Exception as e:
288
- return f"Error: {str(e)}"
289
  finally:
290
  self.release_cuda_memory()
291
  self.remove_input_files([micaudio])
@@ -395,16 +408,19 @@ class FasterWhisperInference(BaseInterface):
395
 
396
  if file_format == "SRT":
397
  content = get_srt(transcribed_segments)
398
- write_file(content, f"{output_path}.srt")
 
399
 
400
  elif file_format == "WebVTT":
401
  content = get_vtt(transcribed_segments)
402
- write_file(content, f"{output_path}.vtt")
 
403
 
404
  elif file_format == "txt":
405
  content = get_txt(transcribed_segments)
406
- write_file(content, f"{output_path}.txt")
407
- return content
 
408
 
409
  @staticmethod
410
  def format_time(elapsed_time: float) -> str:
 
42
  no_speech_threshold: float,
43
  compute_type: str,
44
  progress=gr.Progress()
45
+ ) -> list:
46
  """
47
  Write subtitle file from Files
48
 
 
78
 
79
  Returns
80
  ----------
81
+ A List of
82
  String to return to gr.Textbox()
83
+ Files to return to gr.Files()
84
  """
85
  try:
86
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
 
97
  progress=progress
98
  )
99
 
100
+ file_name, file_ext = os.path.splitext(os.path.basename(fileobj.name))
101
  file_name = safe_filename(file_name)
102
+ subtitle, file_path = self.generate_and_write_file(
103
  file_name=file_name,
104
  transcribed_segments=transcribed_segments,
105
  add_timestamp=add_timestamp,
106
  file_format=file_format
107
  )
108
+ files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path}
 
109
 
110
  total_result = ''
111
  total_time = 0
 
115
  total_result += f'{info["subtitle"]}'
116
  total_time += info["time_for_task"]
117
 
118
+ gr_str = f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
119
+ gr_file_path = [info['path'] for info in files_info.values()]
120
+
121
+ return [gr_str, gr_file_path]
122
 
123
  except Exception as e:
124
  print(f"Error transcribing file on line {e}")
 
138
  no_speech_threshold: float,
139
  compute_type: str,
140
  progress=gr.Progress()
141
+ ) -> list:
142
  """
143
  Write subtitle file from Youtube
144
 
 
174
 
175
  Returns
176
  ----------
177
+ A List of
178
  String to return to gr.Textbox()
179
+ Files to return to gr.Files()
180
  """
181
  try:
182
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
 
198
  progress(1, desc="Completed!")
199
 
200
  file_name = safe_filename(yt.title)
201
+ subtitle, file_path = self.generate_and_write_file(
202
  file_name=file_name,
203
  transcribed_segments=transcribed_segments,
204
  add_timestamp=add_timestamp,
205
  file_format=file_format
206
  )
207
+ gr_str = f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
208
+
209
+ return [gr_str, file_path]
210
+
211
  except Exception as e:
212
+ print(f"Error transcribing file on line {e}")
213
  finally:
214
  try:
215
  if 'yt' not in locals():
 
234
  no_speech_threshold: float,
235
  compute_type: str,
236
  progress=gr.Progress()
237
+ ) -> list:
238
  """
239
  Write subtitle file from microphone
240
 
 
268
 
269
  Returns
270
  ----------
271
+ A List of
272
  String to return to gr.Textbox()
273
+ Files to return to gr.Files()
274
  """
275
  try:
276
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
 
288
  )
289
  progress(1, desc="Completed!")
290
 
291
+ subtitle, file_path = self.generate_and_write_file(
292
  file_name="Mic",
293
  transcribed_segments=transcribed_segments,
294
  add_timestamp=True,
295
  file_format=file_format
296
  )
297
+
298
+ gr_str = f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
299
+ return [gr_str, file_path]
300
  except Exception as e:
301
+ print(f"Error transcribing file on line {e}")
302
  finally:
303
  self.release_cuda_memory()
304
  self.remove_input_files([micaudio])
 
408
 
409
  if file_format == "SRT":
410
  content = get_srt(transcribed_segments)
411
+ output_path += '.srt'
412
+ write_file(content, output_path)
413
 
414
  elif file_format == "WebVTT":
415
  content = get_vtt(transcribed_segments)
416
+ output_path += '.vtt'
417
+ write_file(content, output_path)
418
 
419
  elif file_format == "txt":
420
  content = get_txt(transcribed_segments)
421
+ output_path += '.txt'
422
+ write_file(content, output_path)
423
+ return content, output_path
424
 
425
  @staticmethod
426
  def format_time(elapsed_time: float) -> str:
modules/nllb_inference.py CHANGED
@@ -34,7 +34,7 @@ class NLLBInference(BaseInterface):
34
  src_lang: str,
35
  tgt_lang: str,
36
  add_timestamp: bool,
37
- progress=gr.Progress()):
38
  """
39
  Translate subtitle file from source language to target language
40
 
@@ -53,6 +53,12 @@ class NLLBInference(BaseInterface):
53
  progress: gr.Progress
54
  Indicator to show progress directly in gradio.
55
  I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
 
 
 
 
 
 
56
  """
57
  try:
58
  if model_size != self.current_model_size or self.model is None:
@@ -92,8 +98,9 @@ class NLLBInference(BaseInterface):
92
  output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}")
93
  else:
94
  output_path = os.path.join("outputs", "translations", f"{file_name}")
 
95
 
96
- write_file(subtitle, f"{output_path}.srt")
97
 
98
  elif file_ext == ".vtt":
99
  parsed_dicts = parse_vtt(file_path=file_path)
@@ -109,8 +116,9 @@ class NLLBInference(BaseInterface):
109
  output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}")
110
  else:
111
  output_path = os.path.join("outputs", "translations", f"{file_name}")
 
112
 
113
- write_file(subtitle, f"{output_path}.vtt")
114
 
115
  files_info[file_name] = subtitle
116
 
@@ -120,9 +128,10 @@ class NLLBInference(BaseInterface):
120
  total_result += f'{file_name}\n\n'
121
  total_result += f'{subtitle}'
122
 
123
- return f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}"
 
124
  except Exception as e:
125
- return f"Error: {str(e)}"
126
  finally:
127
  self.release_cuda_memory()
128
  self.remove_input_files([fileobj.name for fileobj in fileobjs])
 
34
  src_lang: str,
35
  tgt_lang: str,
36
  add_timestamp: bool,
37
+ progress=gr.Progress()) -> list:
38
  """
39
  Translate subtitle file from source language to target language
40
 
 
53
  progress: gr.Progress
54
  Indicator to show progress directly in gradio.
55
  I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
56
+
57
+ Returns
58
+ ----------
59
+ A List of
60
+ String to return to gr.Textbox()
61
+ Files to return to gr.Files()
62
  """
63
  try:
64
  if model_size != self.current_model_size or self.model is None:
 
98
  output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}")
99
  else:
100
  output_path = os.path.join("outputs", "translations", f"{file_name}")
101
+ output_path += '.srt'
102
 
103
+ write_file(subtitle, output_path)
104
 
105
  elif file_ext == ".vtt":
106
  parsed_dicts = parse_vtt(file_path=file_path)
 
116
  output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}")
117
  else:
118
  output_path = os.path.join("outputs", "translations", f"{file_name}")
119
+ output_path += '.vtt'
120
 
121
+ write_file(subtitle, output_path)
122
 
123
  files_info[file_name] = subtitle
124
 
 
128
  total_result += f'{file_name}\n\n'
129
  total_result += f'{subtitle}'
130
 
131
+ gr_str = f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}"
132
+ return [gr_str, output_path]
133
  except Exception as e:
134
+ print(f"Error: {str(e)}")
135
  finally:
136
  self.release_cuda_memory()
137
  self.remove_input_files([fileobj.name for fileobj in fileobjs])
modules/whisper_Inference.py CHANGED
@@ -37,7 +37,7 @@ class WhisperInference(BaseInterface):
37
  log_prob_threshold: float,
38
  no_speech_threshold: float,
39
  compute_type: str,
40
- progress=gr.Progress()):
41
  """
42
  Write subtitle file from Files
43
 
@@ -70,8 +70,13 @@ class WhisperInference(BaseInterface):
70
  progress: gr.Progress
71
  Indicator to show progress directly in gradio.
72
  I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
73
- """
74
 
 
 
 
 
 
 
75
  try:
76
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
77
 
@@ -91,16 +96,15 @@ class WhisperInference(BaseInterface):
91
  )
92
  progress(1, desc="Completed!")
93
 
94
- file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
95
  file_name = safe_filename(file_name)
96
- subtitle = self.generate_and_write_file(
97
  file_name=file_name,
98
  transcribed_segments=result,
99
  add_timestamp=add_timestamp,
100
  file_format=file_format
101
  )
102
-
103
- files_info[file_name] = {"subtitle": subtitle, "elapsed_time": elapsed_time}
104
 
105
  total_result = ''
106
  total_time = 0
@@ -110,10 +114,12 @@ class WhisperInference(BaseInterface):
110
  total_result += f"{info['subtitle']}"
111
  total_time += info["elapsed_time"]
112
 
113
- return f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
 
 
 
114
  except Exception as e:
115
  print(f"Error transcribing file: {str(e)}")
116
- return f"Error transcribing file: {str(e)}"
117
  finally:
118
  self.release_cuda_memory()
119
  self.remove_input_files([fileobj.name for fileobj in fileobjs])
@@ -129,7 +135,7 @@ class WhisperInference(BaseInterface):
129
  log_prob_threshold: float,
130
  no_speech_threshold: float,
131
  compute_type: str,
132
- progress=gr.Progress()):
133
  """
134
  Write subtitle file from Youtube
135
 
@@ -162,6 +168,12 @@ class WhisperInference(BaseInterface):
162
  progress: gr.Progress
163
  Indicator to show progress directly in gradio.
164
  I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
 
 
 
 
 
 
165
  """
166
  try:
167
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -181,17 +193,17 @@ class WhisperInference(BaseInterface):
181
  progress(1, desc="Completed!")
182
 
183
  file_name = safe_filename(yt.title)
184
- subtitle = self.generate_and_write_file(
185
  file_name=file_name,
186
  transcribed_segments=result,
187
  add_timestamp=add_timestamp,
188
  file_format=file_format
189
  )
190
 
191
- return f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
 
192
  except Exception as e:
193
  print(f"Error transcribing youtube video: {str(e)}")
194
- return f"Error transcribing youtube video: {str(e)}"
195
  finally:
196
  try:
197
  if 'yt' not in locals():
@@ -215,7 +227,7 @@ class WhisperInference(BaseInterface):
215
  log_prob_threshold: float,
216
  no_speech_threshold: float,
217
  compute_type: str,
218
- progress=gr.Progress()):
219
  """
220
  Write subtitle file from microphone
221
 
@@ -246,8 +258,13 @@ class WhisperInference(BaseInterface):
246
  progress: gr.Progress
247
  Indicator to show progress directly in gradio.
248
  I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
249
- """
250
 
 
 
 
 
 
 
251
  try:
252
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
253
 
@@ -261,17 +278,17 @@ class WhisperInference(BaseInterface):
261
  progress=progress)
262
  progress(1, desc="Completed!")
263
 
264
- subtitle = self.generate_and_write_file(
265
  file_name="Mic",
266
  transcribed_segments=result,
267
  add_timestamp=True,
268
  file_format=file_format
269
  )
270
 
271
- return f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
 
272
  except Exception as e:
273
  print(f"Error transcribing mic: {str(e)}")
274
- return f"Error transcribing mic: {str(e)}"
275
  finally:
276
  self.release_cuda_memory()
277
  self.remove_input_files([micaudio])
@@ -377,16 +394,19 @@ class WhisperInference(BaseInterface):
377
 
378
  if file_format == "SRT":
379
  content = get_srt(transcribed_segments)
380
- write_file(content, f"{output_path}.srt")
 
381
 
382
  elif file_format == "WebVTT":
383
  content = get_vtt(transcribed_segments)
384
- write_file(content, f"{output_path}.vtt")
 
385
 
386
  elif file_format == "txt":
387
  content = get_txt(transcribed_segments)
388
- write_file(content, f"{output_path}.vtt")
389
- return content
 
390
 
391
  @staticmethod
392
  def format_time(elapsed_time: float) -> str:
 
37
  log_prob_threshold: float,
38
  no_speech_threshold: float,
39
  compute_type: str,
40
+ progress=gr.Progress()) -> list:
41
  """
42
  Write subtitle file from Files
43
 
 
70
  progress: gr.Progress
71
  Indicator to show progress directly in gradio.
72
  I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
 
73
 
74
+ Returns
75
+ ----------
76
+ A List of
77
+ String to return to gr.Textbox()
78
+ Files to return to gr.Files()
79
+ """
80
  try:
81
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
82
 
 
96
  )
97
  progress(1, desc="Completed!")
98
 
99
+ file_name, file_ext = os.path.splitext(os.path.basename(fileobj.name))
100
  file_name = safe_filename(file_name)
101
+ subtitle, file_path = self.generate_and_write_file(
102
  file_name=file_name,
103
  transcribed_segments=result,
104
  add_timestamp=add_timestamp,
105
  file_format=file_format
106
  )
107
+ files_info[file_name] = {"subtitle": subtitle, "elapsed_time": elapsed_time, "path": file_path}
 
108
 
109
  total_result = ''
110
  total_time = 0
 
114
  total_result += f"{info['subtitle']}"
115
  total_time += info["elapsed_time"]
116
 
117
+ gr_str = f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
118
+ gr_file_path = [info['path'] for info in files_info.values()]
119
+
120
+ return [gr_str, gr_file_path]
121
  except Exception as e:
122
  print(f"Error transcribing file: {str(e)}")
 
123
  finally:
124
  self.release_cuda_memory()
125
  self.remove_input_files([fileobj.name for fileobj in fileobjs])
 
135
  log_prob_threshold: float,
136
  no_speech_threshold: float,
137
  compute_type: str,
138
+ progress=gr.Progress()) -> list:
139
  """
140
  Write subtitle file from Youtube
141
 
 
168
  progress: gr.Progress
169
  Indicator to show progress directly in gradio.
170
  I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
171
+
172
+ Returns
173
+ ----------
174
+ A List of
175
+ String to return to gr.Textbox()
176
+ Files to return to gr.Files()
177
  """
178
  try:
179
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
 
193
  progress(1, desc="Completed!")
194
 
195
  file_name = safe_filename(yt.title)
196
+ subtitle, file_path = self.generate_and_write_file(
197
  file_name=file_name,
198
  transcribed_segments=result,
199
  add_timestamp=add_timestamp,
200
  file_format=file_format
201
  )
202
 
203
+ gr_str = f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
204
+ return [gr_str, file_path]
205
  except Exception as e:
206
  print(f"Error transcribing youtube video: {str(e)}")
 
207
  finally:
208
  try:
209
  if 'yt' not in locals():
 
227
  log_prob_threshold: float,
228
  no_speech_threshold: float,
229
  compute_type: str,
230
+ progress=gr.Progress()) -> list:
231
  """
232
  Write subtitle file from microphone
233
 
 
258
  progress: gr.Progress
259
  Indicator to show progress directly in gradio.
260
  I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
 
261
 
262
+ Returns
263
+ ----------
264
+ A List of
265
+ String to return to gr.Textbox()
266
+ Files to return to gr.Files()
267
+ """
268
  try:
269
  self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
270
 
 
278
  progress=progress)
279
  progress(1, desc="Completed!")
280
 
281
+ subtitle, file_path = self.generate_and_write_file(
282
  file_name="Mic",
283
  transcribed_segments=result,
284
  add_timestamp=True,
285
  file_format=file_format
286
  )
287
 
288
+ gr_str = f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
289
+ return [gr_str, file_path]
290
  except Exception as e:
291
  print(f"Error transcribing mic: {str(e)}")
 
292
  finally:
293
  self.release_cuda_memory()
294
  self.remove_input_files([micaudio])
 
394
 
395
  if file_format == "SRT":
396
  content = get_srt(transcribed_segments)
397
+ output_path += '.srt'
398
+ write_file(content, output_path)
399
 
400
  elif file_format == "WebVTT":
401
  content = get_vtt(transcribed_segments)
402
+ output_path += '.vtt'
403
+ write_file(content, output_path)
404
 
405
  elif file_format == "txt":
406
  content = get_txt(transcribed_segments)
407
+ output_path += '.txt'
408
+ write_file(content, output_path)
409
+ return content, output_path
410
 
411
  @staticmethod
412
  def format_time(elapsed_time: float) -> str:
requirements.txt CHANGED
@@ -3,5 +3,5 @@ torch
3
  git+https://github.com/jhj0517/jhj0517-whisper.git
4
  faster-whisper
5
  transformers
6
- gradio==3.37.0
7
  pytube
 
3
  git+https://github.com/jhj0517/jhj0517-whisper.git
4
  faster-whisper
5
  transformers
6
+ gradio==4.14.0
7
  pytube