jhj0517 commited on
Commit
7e8138f
1 Parent(s): b2f7849

implement txt file format in `faster_whisper_inference.py`

Browse files
modules/faster_whisper_inference.py CHANGED
@@ -13,7 +13,7 @@ import torch
13
  import gradio as gr
14
 
15
  from .base_interface import BaseInterface
16
- from modules.subtitle_manager import get_srt, get_vtt, write_file, safe_filename
17
  from modules.youtube_manager import get_ytdata, get_ytaudio
18
 
19
 
@@ -34,7 +34,7 @@ class FasterWhisperInference(BaseInterface):
34
  fileobjs: list,
35
  model_size: str,
36
  lang: str,
37
- subformat: str,
38
  istranslate: bool,
39
  add_timestamp: bool,
40
  beam_size: int,
@@ -54,8 +54,8 @@ class FasterWhisperInference(BaseInterface):
54
  Whisper model size from gr.Dropdown()
55
  lang: str
56
  Source language of the file to transcribe from gr.Dropdown()
57
- subformat: str
58
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
59
  istranslate: bool
60
  Boolean value from gr.Checkbox() that determines whether to translate to English.
61
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -97,12 +97,13 @@ class FasterWhisperInference(BaseInterface):
97
 
98
  file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
99
  file_name = safe_filename(file_name)
100
- subtitle = self.generate_and_write_subtitle(
101
  file_name=file_name,
102
  transcribed_segments=transcribed_segments,
103
  add_timestamp=add_timestamp,
104
- subformat=subformat
105
  )
 
106
  files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task}
107
 
108
  total_result = ''
@@ -125,7 +126,7 @@ class FasterWhisperInference(BaseInterface):
125
  youtubelink: str,
126
  model_size: str,
127
  lang: str,
128
- subformat: str,
129
  istranslate: bool,
130
  add_timestamp: bool,
131
  beam_size: int,
@@ -145,8 +146,8 @@ class FasterWhisperInference(BaseInterface):
145
  Whisper model size from gr.Dropdown()
146
  lang: str
147
  Source language of the file to transcribe from gr.Dropdown()
148
- subformat: str
149
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
150
  istranslate: bool
151
  Boolean value from gr.Checkbox() that determines whether to translate to English.
152
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -191,11 +192,11 @@ class FasterWhisperInference(BaseInterface):
191
  progress(1, desc="Completed!")
192
 
193
  file_name = safe_filename(yt.title)
194
- subtitle = self.generate_and_write_subtitle(
195
  file_name=file_name,
196
  transcribed_segments=transcribed_segments,
197
  add_timestamp=add_timestamp,
198
- subformat=subformat
199
  )
200
  return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
201
  except Exception as e:
@@ -217,7 +218,7 @@ class FasterWhisperInference(BaseInterface):
217
  micaudio: str,
218
  model_size: str,
219
  lang: str,
220
- subformat: str,
221
  istranslate: bool,
222
  beam_size: int,
223
  log_prob_threshold: float,
@@ -236,8 +237,8 @@ class FasterWhisperInference(BaseInterface):
236
  Whisper model size from gr.Dropdown()
237
  lang: str
238
  Source language of the file to transcribe from gr.Dropdown()
239
- subformat: str
240
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
241
  istranslate: bool
242
  Boolean value from gr.Checkbox() that determines whether to translate to English.
243
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -276,11 +277,11 @@ class FasterWhisperInference(BaseInterface):
276
  )
277
  progress(1, desc="Completed!")
278
 
279
- subtitle = self.generate_and_write_subtitle(
280
  file_name="Mic",
281
  transcribed_segments=transcribed_segments,
282
  add_timestamp=True,
283
- subformat=subformat
284
  )
285
  return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
286
  except Exception as e:
@@ -378,11 +379,11 @@ class FasterWhisperInference(BaseInterface):
378
  )
379
 
380
  @staticmethod
381
- def generate_and_write_subtitle(file_name: str,
382
- transcribed_segments: list,
383
- add_timestamp: bool,
384
- subformat: str,
385
- ) -> str:
386
  """
387
  This method writes subtitle file and returns str to gr.Textbox
388
  """
@@ -392,13 +393,18 @@ class FasterWhisperInference(BaseInterface):
392
  else:
393
  output_path = os.path.join("outputs", f"{file_name}")
394
 
395
- if subformat == "SRT":
396
- subtitle = get_srt(transcribed_segments)
397
- write_file(subtitle, f"{output_path}.srt")
398
- elif subformat == "WebVTT":
399
- subtitle = get_vtt(transcribed_segments)
400
- write_file(subtitle, f"{output_path}.vtt")
401
- return subtitle
 
 
 
 
 
402
 
403
  @staticmethod
404
  def format_time(elapsed_time: float) -> str:
 
13
  import gradio as gr
14
 
15
  from .base_interface import BaseInterface
16
+ from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
17
  from modules.youtube_manager import get_ytdata, get_ytaudio
18
 
19
 
 
34
  fileobjs: list,
35
  model_size: str,
36
  lang: str,
37
+ file_format: str,
38
  istranslate: bool,
39
  add_timestamp: bool,
40
  beam_size: int,
 
54
  Whisper model size from gr.Dropdown()
55
  lang: str
56
  Source language of the file to transcribe from gr.Dropdown()
57
+ file_format: str
58
+ File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
59
  istranslate: bool
60
  Boolean value from gr.Checkbox() that determines whether to translate to English.
61
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
97
 
98
  file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
99
  file_name = safe_filename(file_name)
100
+ subtitle = self.generate_and_write_file(
101
  file_name=file_name,
102
  transcribed_segments=transcribed_segments,
103
  add_timestamp=add_timestamp,
104
+ file_format=file_format
105
  )
106
+ print(f"{subtitle}")
107
  files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task}
108
 
109
  total_result = ''
 
126
  youtubelink: str,
127
  model_size: str,
128
  lang: str,
129
+ file_format: str,
130
  istranslate: bool,
131
  add_timestamp: bool,
132
  beam_size: int,
 
146
  Whisper model size from gr.Dropdown()
147
  lang: str
148
  Source language of the file to transcribe from gr.Dropdown()
149
+ file_format: str
150
+ File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
151
  istranslate: bool
152
  Boolean value from gr.Checkbox() that determines whether to translate to English.
153
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
192
  progress(1, desc="Completed!")
193
 
194
  file_name = safe_filename(yt.title)
195
+ subtitle = self.generate_and_write_file(
196
  file_name=file_name,
197
  transcribed_segments=transcribed_segments,
198
  add_timestamp=add_timestamp,
199
+ file_format=file_format
200
  )
201
  return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
202
  except Exception as e:
 
218
  micaudio: str,
219
  model_size: str,
220
  lang: str,
221
+ file_format: str,
222
  istranslate: bool,
223
  beam_size: int,
224
  log_prob_threshold: float,
 
237
  Whisper model size from gr.Dropdown()
238
  lang: str
239
  Source language of the file to transcribe from gr.Dropdown()
240
+ file_format: str
241
+ File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
242
  istranslate: bool
243
  Boolean value from gr.Checkbox() that determines whether to translate to English.
244
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
277
  )
278
  progress(1, desc="Completed!")
279
 
280
+ subtitle = self.generate_and_write_file(
281
  file_name="Mic",
282
  transcribed_segments=transcribed_segments,
283
  add_timestamp=True,
284
+ file_format=file_format
285
  )
286
  return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
287
  except Exception as e:
 
379
  )
380
 
381
  @staticmethod
382
+ def generate_and_write_file(file_name: str,
383
+ transcribed_segments: list,
384
+ add_timestamp: bool,
385
+ file_format: str,
386
+ ) -> str:
387
  """
388
  This method writes subtitle file and returns str to gr.Textbox
389
  """
 
393
  else:
394
  output_path = os.path.join("outputs", f"{file_name}")
395
 
396
+ if file_format == "SRT":
397
+ content = get_srt(transcribed_segments)
398
+ write_file(content, f"{output_path}.srt")
399
+
400
+ elif file_format == "WebVTT":
401
+ content = get_vtt(transcribed_segments)
402
+ write_file(content, f"{output_path}.vtt")
403
+
404
+ elif file_format == "txt":
405
+ content = get_txt(transcribed_segments)
406
+ write_file(content, f"{output_path}.txt")
407
+ return content
408
 
409
  @staticmethod
410
  def format_time(elapsed_time: float) -> str:
modules/subtitle_manager.py CHANGED
@@ -44,6 +44,15 @@ def get_vtt(segments):
44
  return output
45
 
46
 
 
 
 
 
 
 
 
 
 
47
  def parse_srt(file_path):
48
  """Reads SRT file and returns as dict"""
49
  with open(file_path, 'r', encoding='utf-8') as file:
 
44
  return output
45
 
46
 
47
+ def get_txt(segments):
48
+ output = ""
49
+ for i, segment in enumerate(segments):
50
+ if segment['text'].startswith(' '):
51
+ segment['text'] = segment['text'][1:]
52
+ output += f"{segment['text']}\n"
53
+ return output
54
+
55
+
56
  def parse_srt(file_path):
57
  """Reads SRT file and returns as dict"""
58
  with open(file_path, 'r', encoding='utf-8') as file: