Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on Jan 14

Commit

f02937a

•

2 Parent(s): daf4d68 1f9180b

Merge pull request #76 from jhj0517/colab-button

Browse files

Files changed (5) hide show

app.py +19 -14
modules/faster_whisper_inference.py +34 -18
modules/nllb_inference.py +14 -5
modules/whisper_Inference.py +41 -21
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -44,7 +44,7 @@ class App:
             with gr.Tabs():
                 with gr.TabItem("File"):  # tab1
                     with gr.Row():
-                        input_file = gr.Files(type="file", label="Upload File here")
                     with gr.Row():
                         dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v3",
                                                label="Model")
@@ -63,14 +63,15 @@ class App:
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
-                        tb_indicator = gr.Textbox(label="Output", scale=8)
-                        btn_openfolder = gr.Button('📂', scale=2)
                     params = [input_file, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
                     advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
                     btn_run.click(fn=self.whisper_inf.transcribe_file,
                                   inputs=params + advanced_params,
-                                  outputs=[tb_indicator])
                     btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
                     dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
@@ -102,14 +103,15 @@ class App:
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
-                        tb_indicator = gr.Textbox(label="Output", scale=8)
-                        btn_openfolder = gr.Button('📂', scale=2)
                     params = [tb_youtubelink, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
                     advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
                     btn_run.click(fn=self.whisper_inf.transcribe_youtube,
                                   inputs=params + advanced_params,
-                                  outputs=[tb_indicator])
                     tb_youtubelink.change(get_ytmetas, inputs=[tb_youtubelink],
                                           outputs=[img_thumbnail, tb_title, tb_description])
                     btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
@@ -134,20 +136,21 @@ class App:
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
-                        tb_indicator = gr.Textbox(label="Output", scale=8)
-                        btn_openfolder = gr.Button('📂', scale=2)
                     params = [mic_input, dd_model, dd_lang, dd_file_format, cb_translate]
                     advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
                     btn_run.click(fn=self.whisper_inf.transcribe_mic,
                                   inputs=params + advanced_params,
-                                  outputs=[tb_indicator])
                     btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
                     dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
                 with gr.TabItem("T2T Translation"):  # tab 4
                     with gr.Row():
-                        file_subs = gr.Files(type="file", label="Upload Subtitle Files to translate here",
                                              file_types=['.vtt', '.srt'])
                     with gr.TabItem("NLLB"):  # sub tab1
@@ -164,14 +167,16 @@ class App:
                         with gr.Row():
                             btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
                         with gr.Row():
-                            tb_indicator = gr.Textbox(label="Output", scale=8)
-                            btn_openfolder = gr.Button('📂', scale=2)
                         with gr.Column():
                             md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
                     btn_run.click(fn=self.nllb_inf.translate_file,
                                   inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang, cb_timestamp],
-                                  outputs=[tb_indicator])
                     btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
                                          inputs=None,
                                          outputs=None)

             with gr.Tabs():
                 with gr.TabItem("File"):  # tab1
                     with gr.Row():
+                        input_file = gr.Files(type="filepath", label="Upload File here")
                     with gr.Row():
                         dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v3",
                                                label="Model")
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
+                        tb_indicator = gr.Textbox(label="Output", scale=4)
+                        files_subtitles = gr.Files(label="Downloadable output file", scale=4, interactive=False)
+                        btn_openfolder = gr.Button('📂', scale=1)
                     params = [input_file, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
                     advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
                     btn_run.click(fn=self.whisper_inf.transcribe_file,
                                   inputs=params + advanced_params,
+                                  outputs=[tb_indicator, files_subtitles])
                     btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
                     dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
+                        tb_indicator = gr.Textbox(label="Output", scale=4)
+                        files_subtitles = gr.Files(label="Downloadable output file", scale=4)
+                        btn_openfolder = gr.Button('📂', scale=1)
                     params = [tb_youtubelink, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
                     advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
                     btn_run.click(fn=self.whisper_inf.transcribe_youtube,
                                   inputs=params + advanced_params,
+                                  outputs=[tb_indicator, files_subtitles])
                     tb_youtubelink.change(get_ytmetas, inputs=[tb_youtubelink],
                                           outputs=[img_thumbnail, tb_title, tb_description])
                     btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
+                        tb_indicator = gr.Textbox(label="Output", scale=4)
+                        files_subtitles = gr.Files(label="Downloadable output file", scale=4)
+                        btn_openfolder = gr.Button('📂', scale=1)
                     params = [mic_input, dd_model, dd_lang, dd_file_format, cb_translate]
                     advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
                     btn_run.click(fn=self.whisper_inf.transcribe_mic,
                                   inputs=params + advanced_params,
+                                  outputs=[tb_indicator, files_subtitles])
                     btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
                     dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
                 with gr.TabItem("T2T Translation"):  # tab 4
                     with gr.Row():
+                        file_subs = gr.Files(type="filepath", label="Upload Subtitle Files to translate here",
                                              file_types=['.vtt', '.srt'])
                     with gr.TabItem("NLLB"):  # sub tab1
                         with gr.Row():
                             btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
                         with gr.Row():
+                            tb_indicator = gr.Textbox(label="Output", scale=4)
+                            files_subtitles = gr.Files(label="Downloadable output file", scale=4)
+                            btn_openfolder = gr.Button('📂', scale=1)
                         with gr.Column():
                             md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
                     btn_run.click(fn=self.nllb_inf.translate_file,
                                   inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang, cb_timestamp],
+                                  outputs=[tb_indicator, files_subtitles])
                     btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
                                          inputs=None,
                                          outputs=None)

modules/faster_whisper_inference.py CHANGED Viewed

@@ -42,7 +42,7 @@ class FasterWhisperInference(BaseInterface):
                         no_speech_threshold: float,
                         compute_type: str,
                         progress=gr.Progress()
-                        ) -> str:
         """
         Write subtitle file from Files
@@ -78,7 +78,9 @@ class FasterWhisperInference(BaseInterface):
         Returns
         ----------
         String to return to gr.Textbox()
         """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -95,16 +97,15 @@ class FasterWhisperInference(BaseInterface):
                     progress=progress
                 )
-                file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
                 file_name = safe_filename(file_name)
-                subtitle = self.generate_and_write_file(
                     file_name=file_name,
                     transcribed_segments=transcribed_segments,
                     add_timestamp=add_timestamp,
                     file_format=file_format
                 )
-                print(f"{subtitle}")
-                files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task}
             total_result = ''
             total_time = 0
@@ -114,7 +115,10 @@ class FasterWhisperInference(BaseInterface):
                 total_result += f'{info["subtitle"]}'
                 total_time += info["time_for_task"]
-            return f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
         except Exception as e:
             print(f"Error transcribing file on line {e}")
@@ -134,7 +138,7 @@ class FasterWhisperInference(BaseInterface):
                            no_speech_threshold: float,
                            compute_type: str,
                            progress=gr.Progress()
-                           ) -> str:
         """
         Write subtitle file from Youtube
@@ -170,7 +174,9 @@ class FasterWhisperInference(BaseInterface):
         Returns
         ----------
         String to return to gr.Textbox()
         """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -192,15 +198,18 @@ class FasterWhisperInference(BaseInterface):
             progress(1, desc="Completed!")
             file_name = safe_filename(yt.title)
-            subtitle = self.generate_and_write_file(
                 file_name=file_name,
                 transcribed_segments=transcribed_segments,
                 add_timestamp=add_timestamp,
                 file_format=file_format
             )
-            return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
         except Exception as e:
-            return f"Error: {str(e)}"
         finally:
             try:
                 if 'yt' not in locals():
@@ -225,7 +234,7 @@ class FasterWhisperInference(BaseInterface):
                        no_speech_threshold: float,
                        compute_type: str,
                        progress=gr.Progress()
-                       ) -> str:
         """
         Write subtitle file from microphone
@@ -259,7 +268,9 @@ class FasterWhisperInference(BaseInterface):
         Returns
         ----------
         String to return to gr.Textbox()
         """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -277,15 +288,17 @@ class FasterWhisperInference(BaseInterface):
             )
             progress(1, desc="Completed!")
-            subtitle = self.generate_and_write_file(
                 file_name="Mic",
                 transcribed_segments=transcribed_segments,
                 add_timestamp=True,
                 file_format=file_format
             )
-            return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
         except Exception as e:
-            return f"Error: {str(e)}"
         finally:
             self.release_cuda_memory()
             self.remove_input_files([micaudio])
@@ -395,16 +408,19 @@ class FasterWhisperInference(BaseInterface):
         if file_format == "SRT":
             content = get_srt(transcribed_segments)
-            write_file(content, f"{output_path}.srt")
         elif file_format == "WebVTT":
             content = get_vtt(transcribed_segments)
-            write_file(content, f"{output_path}.vtt")
         elif file_format == "txt":
             content = get_txt(transcribed_segments)
-            write_file(content, f"{output_path}.txt")
-        return content
     @staticmethod
     def format_time(elapsed_time: float) -> str:

                         no_speech_threshold: float,
                         compute_type: str,
                         progress=gr.Progress()
+                        ) -> list:
         """
         Write subtitle file from Files
         Returns
         ----------
+        A List of
         String to return to gr.Textbox()
+        Files to return to gr.Files()
         """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
                     progress=progress
                 )
+                file_name, file_ext = os.path.splitext(os.path.basename(fileobj.name))
                 file_name = safe_filename(file_name)
+                subtitle, file_path = self.generate_and_write_file(
                     file_name=file_name,
                     transcribed_segments=transcribed_segments,
                     add_timestamp=add_timestamp,
                     file_format=file_format
                 )
+                files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path":  file_path}
             total_result = ''
             total_time = 0
                 total_result += f'{info["subtitle"]}'
                 total_time += info["time_for_task"]
+            gr_str = f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
+            gr_file_path = [info['path'] for info in files_info.values()]
+            return [gr_str, gr_file_path]
         except Exception as e:
             print(f"Error transcribing file on line {e}")
                            no_speech_threshold: float,
                            compute_type: str,
                            progress=gr.Progress()
+                           ) -> list:
         """
         Write subtitle file from Youtube
         Returns
         ----------
+        A List of
         String to return to gr.Textbox()
+        Files to return to gr.Files()
         """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
             progress(1, desc="Completed!")
             file_name = safe_filename(yt.title)
+            subtitle, file_path = self.generate_and_write_file(
                 file_name=file_name,
                 transcribed_segments=transcribed_segments,
                 add_timestamp=add_timestamp,
                 file_format=file_format
             )
+            gr_str = f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
+            return [gr_str, file_path]
         except Exception as e:
+            print(f"Error transcribing file on line {e}")
         finally:
             try:
                 if 'yt' not in locals():
                        no_speech_threshold: float,
                        compute_type: str,
                        progress=gr.Progress()
+                       ) -> list:
         """
         Write subtitle file from microphone
         Returns
         ----------
+        A List of
         String to return to gr.Textbox()
+        Files to return to gr.Files()
         """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
             )
             progress(1, desc="Completed!")
+            subtitle, file_path = self.generate_and_write_file(
                 file_name="Mic",
                 transcribed_segments=transcribed_segments,
                 add_timestamp=True,
                 file_format=file_format
             )
+            gr_str = f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
+            return [gr_str, file_path]
         except Exception as e:
+            print(f"Error transcribing file on line {e}")
         finally:
             self.release_cuda_memory()
             self.remove_input_files([micaudio])
         if file_format == "SRT":
             content = get_srt(transcribed_segments)
+            output_path += '.srt'
+            write_file(content, output_path)
         elif file_format == "WebVTT":
             content = get_vtt(transcribed_segments)
+            output_path += '.vtt'
+            write_file(content, output_path)
         elif file_format == "txt":
             content = get_txt(transcribed_segments)
+            output_path += '.txt'
+            write_file(content, output_path)
+        return content, output_path
     @staticmethod
     def format_time(elapsed_time: float) -> str:

modules/nllb_inference.py CHANGED Viewed

@@ -34,7 +34,7 @@ class NLLBInference(BaseInterface):
                        src_lang: str,
                        tgt_lang: str,
                        add_timestamp: bool,
-                       progress=gr.Progress()):
         """
         Translate subtitle file from source language to target language
@@ -53,6 +53,12 @@ class NLLBInference(BaseInterface):
         progress: gr.Progress
             Indicator to show progress directly in gradio.
             I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
         """
         try:
             if model_size != self.current_model_size or self.model is None:
@@ -92,8 +98,9 @@ class NLLBInference(BaseInterface):
                         output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}")
                     else:
                         output_path = os.path.join("outputs", "translations", f"{file_name}")
-                    write_file(subtitle, f"{output_path}.srt")
                 elif file_ext == ".vtt":
                     parsed_dicts = parse_vtt(file_path=file_path)
@@ -109,8 +116,9 @@ class NLLBInference(BaseInterface):
                         output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}")
                     else:
                         output_path = os.path.join("outputs", "translations", f"{file_name}")
-                    write_file(subtitle, f"{output_path}.vtt")
                 files_info[file_name] = subtitle
@@ -120,9 +128,10 @@ class NLLBInference(BaseInterface):
                 total_result += f'{file_name}\n\n'
                 total_result += f'{subtitle}'
-            return f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}"
         except Exception as e:
-            return f"Error: {str(e)}"
         finally:
             self.release_cuda_memory()
             self.remove_input_files([fileobj.name for fileobj in fileobjs])

                        src_lang: str,
                        tgt_lang: str,
                        add_timestamp: bool,
+                       progress=gr.Progress()) -> list:
         """
         Translate subtitle file from source language to target language
         progress: gr.Progress
             Indicator to show progress directly in gradio.
             I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
+        Returns
+        ----------
+        A List of
+        String to return to gr.Textbox()
+        Files to return to gr.Files()
         """
         try:
             if model_size != self.current_model_size or self.model is None:
                         output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}")
                     else:
                         output_path = os.path.join("outputs", "translations", f"{file_name}")
+                    output_path += '.srt'
+                    write_file(subtitle, output_path)
                 elif file_ext == ".vtt":
                     parsed_dicts = parse_vtt(file_path=file_path)
                         output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}")
                     else:
                         output_path = os.path.join("outputs", "translations", f"{file_name}")
+                    output_path += '.vtt'
+                    write_file(subtitle, output_path)
                 files_info[file_name] = subtitle
                 total_result += f'{file_name}\n\n'
                 total_result += f'{subtitle}'
+            gr_str = f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}"
+            return [gr_str, output_path]
         except Exception as e:
+            print(f"Error: {str(e)}")
         finally:
             self.release_cuda_memory()
             self.remove_input_files([fileobj.name for fileobj in fileobjs])

modules/whisper_Inference.py CHANGED Viewed

@@ -37,7 +37,7 @@ class WhisperInference(BaseInterface):
                         log_prob_threshold: float,
                         no_speech_threshold: float,
                         compute_type: str,
-                        progress=gr.Progress()):
         """
         Write subtitle file from Files
@@ -70,8 +70,13 @@ class WhisperInference(BaseInterface):
         progress: gr.Progress
             Indicator to show progress directly in gradio.
             I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
-        """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -91,16 +96,15 @@ class WhisperInference(BaseInterface):
                                                        )
                 progress(1, desc="Completed!")
-                file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
                 file_name = safe_filename(file_name)
-                subtitle = self.generate_and_write_file(
                     file_name=file_name,
                     transcribed_segments=result,
                     add_timestamp=add_timestamp,
                     file_format=file_format
                 )
-                files_info[file_name] = {"subtitle": subtitle, "elapsed_time": elapsed_time}
             total_result = ''
             total_time = 0
@@ -110,10 +114,12 @@ class WhisperInference(BaseInterface):
                 total_result += f"{info['subtitle']}"
                 total_time += info["elapsed_time"]
-            return f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
         except Exception as e:
             print(f"Error transcribing file: {str(e)}")
-            return f"Error transcribing file: {str(e)}"
         finally:
             self.release_cuda_memory()
             self.remove_input_files([fileobj.name for fileobj in fileobjs])
@@ -129,7 +135,7 @@ class WhisperInference(BaseInterface):
                            log_prob_threshold: float,
                            no_speech_threshold: float,
                            compute_type: str,
-                           progress=gr.Progress()):
         """
         Write subtitle file from Youtube
@@ -162,6 +168,12 @@ class WhisperInference(BaseInterface):
         progress: gr.Progress
             Indicator to show progress directly in gradio.
             I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
         """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -181,17 +193,17 @@ class WhisperInference(BaseInterface):
             progress(1, desc="Completed!")
             file_name = safe_filename(yt.title)
-            subtitle = self.generate_and_write_file(
                 file_name=file_name,
                 transcribed_segments=result,
                 add_timestamp=add_timestamp,
                 file_format=file_format
             )
-            return f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
         except Exception as e:
             print(f"Error transcribing youtube video: {str(e)}")
-            return f"Error transcribing youtube video: {str(e)}"
         finally:
             try:
                 if 'yt' not in locals():
@@ -215,7 +227,7 @@ class WhisperInference(BaseInterface):
                        log_prob_threshold: float,
                        no_speech_threshold: float,
                        compute_type: str,
-                       progress=gr.Progress()):
         """
         Write subtitle file from microphone
@@ -246,8 +258,13 @@ class WhisperInference(BaseInterface):
         progress: gr.Progress
             Indicator to show progress directly in gradio.
             I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
-        """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
@@ -261,17 +278,17 @@ class WhisperInference(BaseInterface):
                                                    progress=progress)
             progress(1, desc="Completed!")
-            subtitle = self.generate_and_write_file(
                 file_name="Mic",
                 transcribed_segments=result,
                 add_timestamp=True,
                 file_format=file_format
             )
-            return f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
         except Exception as e:
             print(f"Error transcribing mic: {str(e)}")
-            return f"Error transcribing mic: {str(e)}"
         finally:
             self.release_cuda_memory()
             self.remove_input_files([micaudio])
@@ -377,16 +394,19 @@ class WhisperInference(BaseInterface):
         if file_format == "SRT":
             content = get_srt(transcribed_segments)
-            write_file(content, f"{output_path}.srt")
         elif file_format == "WebVTT":
             content = get_vtt(transcribed_segments)
-            write_file(content, f"{output_path}.vtt")
         elif file_format == "txt":
             content = get_txt(transcribed_segments)
-            write_file(content, f"{output_path}.vtt")
-        return content
     @staticmethod
     def format_time(elapsed_time: float) -> str:

                         log_prob_threshold: float,
                         no_speech_threshold: float,
                         compute_type: str,
+                        progress=gr.Progress()) -> list:
         """
         Write subtitle file from Files
         progress: gr.Progress
             Indicator to show progress directly in gradio.
             I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
+        Returns
+        ----------
+        A List of
+        String to return to gr.Textbox()
+        Files to return to gr.Files()
+        """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
                                                        )
                 progress(1, desc="Completed!")
+                file_name, file_ext = os.path.splitext(os.path.basename(fileobj.name))
                 file_name = safe_filename(file_name)
+                subtitle, file_path = self.generate_and_write_file(
                     file_name=file_name,
                     transcribed_segments=result,
                     add_timestamp=add_timestamp,
                     file_format=file_format
                 )
+                files_info[file_name] = {"subtitle": subtitle, "elapsed_time": elapsed_time, "path":  file_path}
             total_result = ''
             total_time = 0
                 total_result += f"{info['subtitle']}"
                 total_time += info["elapsed_time"]
+            gr_str = f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
+            gr_file_path = [info['path'] for info in files_info.values()]
+            return [gr_str, gr_file_path]
         except Exception as e:
             print(f"Error transcribing file: {str(e)}")
         finally:
             self.release_cuda_memory()
             self.remove_input_files([fileobj.name for fileobj in fileobjs])
                            log_prob_threshold: float,
                            no_speech_threshold: float,
                            compute_type: str,
+                           progress=gr.Progress()) -> list:
         """
         Write subtitle file from Youtube
         progress: gr.Progress
             Indicator to show progress directly in gradio.
             I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
+        Returns
+        ----------
+        A List of
+        String to return to gr.Textbox()
+        Files to return to gr.Files()
         """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
             progress(1, desc="Completed!")
             file_name = safe_filename(yt.title)
+            subtitle, file_path = self.generate_and_write_file(
                 file_name=file_name,
                 transcribed_segments=result,
                 add_timestamp=add_timestamp,
                 file_format=file_format
             )
+            gr_str = f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
+            return [gr_str, file_path]
         except Exception as e:
             print(f"Error transcribing youtube video: {str(e)}")
         finally:
             try:
                 if 'yt' not in locals():
                        log_prob_threshold: float,
                        no_speech_threshold: float,
                        compute_type: str,
+                       progress=gr.Progress()) -> list:
         """
         Write subtitle file from microphone
         progress: gr.Progress
             Indicator to show progress directly in gradio.
             I use a forked version of whisper for this. To see more info : https://github.com/jhj0517/jhj0517-whisper/tree/add-progress-callback
+        Returns
+        ----------
+        A List of
+        String to return to gr.Textbox()
+        Files to return to gr.Files()
+        """
         try:
             self.update_model_if_needed(model_size=model_size, compute_type=compute_type, progress=progress)
                                                    progress=progress)
             progress(1, desc="Completed!")
+            subtitle, file_path = self.generate_and_write_file(
                 file_name="Mic",
                 transcribed_segments=result,
                 add_timestamp=True,
                 file_format=file_format
             )
+            gr_str = f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
+            return [gr_str, file_path]
         except Exception as e:
             print(f"Error transcribing mic: {str(e)}")
         finally:
             self.release_cuda_memory()
             self.remove_input_files([micaudio])
         if file_format == "SRT":
             content = get_srt(transcribed_segments)
+            output_path += '.srt'
+            write_file(content, output_path)
         elif file_format == "WebVTT":
             content = get_vtt(transcribed_segments)
+            output_path += '.vtt'
+            write_file(content, output_path)
         elif file_format == "txt":
             content = get_txt(transcribed_segments)
+            output_path += '.txt'
+            write_file(content, output_path)
+        return content, output_path
     @staticmethod
     def format_time(elapsed_time: float) -> str:

requirements.txt CHANGED Viewed

@@ -3,5 +3,5 @@ torch
 git+https://github.com/jhj0517/jhj0517-whisper.git
 faster-whisper
 transformers
-gradio==3.37.0
 pytube

 git+https://github.com/jhj0517/jhj0517-whisper.git
 faster-whisper
 transformers
+gradio==4.14.0
 pytube