Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on May 15

Commit

f8c31ce

•

2 Parent(s): be569ea 3a1a0a3

Merge pull request #150 from jhj0517/feature/add-params

Browse files

Files changed (4) hide show

app.py +10 -4
modules/faster_whisper_inference.py +3 -2
modules/whisper_Inference.py +2 -1
modules/{whisper_data_class.py → whisper_parameter.py} +12 -6

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from modules.nllb_inference import NLLBInference
 from ui.htmls import *
 from modules.youtube_manager import get_ytmetas
 from modules.deepl_api import DeepLAPI
-from modules.whisper_data_class import *
 class App:
@@ -67,6 +67,7 @@ class App:
                         nb_patience = gr.Number(label="Patience", value=1, interactive=True)
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
@@ -85,7 +86,8 @@ class App:
                                                              best_of=nb_best_of,
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
-                                                             initial_prompt=tb_initial_prompt)
                     btn_run.click(fn=self.whisper_inf.transcribe_file,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])
@@ -121,6 +123,7 @@ class App:
                         nb_patience = gr.Number(label="Patience", value=1, interactive=True)
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
@@ -139,7 +142,8 @@ class App:
                                                              best_of=nb_best_of,
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
-                                                             initial_prompt=tb_initial_prompt)
                     btn_run.click(fn=self.whisper_inf.transcribe_youtube,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])
@@ -168,6 +172,7 @@ class App:
                         nb_patience = gr.Number(label="Patience", value=1, interactive=True)
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
@@ -186,7 +191,8 @@ class App:
                                                              best_of=nb_best_of,
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
-                                                             initial_prompt=tb_initial_prompt)
                     btn_run.click(fn=self.whisper_inf.transcribe_mic,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])

 from ui.htmls import *
 from modules.youtube_manager import get_ytmetas
 from modules.deepl_api import DeepLAPI
+from modules.whisper_parameter import *
 class App:
                         nb_patience = gr.Number(label="Patience", value=1, interactive=True)
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
+                        sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
                                                              best_of=nb_best_of,
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
+                                                             initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature)
                     btn_run.click(fn=self.whisper_inf.transcribe_file,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])
                         nb_patience = gr.Number(label="Patience", value=1, interactive=True)
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
+                        sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
                                                              best_of=nb_best_of,
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
+                                                             initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature)
                     btn_run.click(fn=self.whisper_inf.transcribe_youtube,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])
                         nb_patience = gr.Number(label="Patience", value=1, interactive=True)
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
+                        sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
                                                              best_of=nb_best_of,
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
+                                                             initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature)
                     btn_run.click(fn=self.whisper_inf.transcribe_mic,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])

modules/faster_whisper_inference.py CHANGED Viewed

@@ -14,7 +14,7 @@ import gradio as gr
 from .base_interface import BaseInterface
 from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
 from modules.youtube_manager import get_ytdata, get_ytaudio
-from modules.whisper_data_class import *
 # Temporal fix of the issue : https://github.com/jhj0517/Whisper-WebUI/issues/144
 os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
@@ -268,7 +268,8 @@ class FasterWhisperInference(BaseInterface):
             log_prob_threshold=params.log_prob_threshold,
             no_speech_threshold=params.no_speech_threshold,
             best_of=params.best_of,
-            patience=params.patience
         )
         progress(0, desc="Loading audio..")

 from .base_interface import BaseInterface
 from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
 from modules.youtube_manager import get_ytdata, get_ytaudio
+from modules.whisper_parameter import *
 # Temporal fix of the issue : https://github.com/jhj0517/Whisper-WebUI/issues/144
 os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
             log_prob_threshold=params.log_prob_threshold,
             no_speech_threshold=params.no_speech_threshold,
             best_of=params.best_of,
+            patience=params.patience,
+            temperature=params.temperature
         )
         progress(0, desc="Loading audio..")

modules/whisper_Inference.py CHANGED Viewed

@@ -10,7 +10,7 @@ import torch
 from .base_interface import BaseInterface
 from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
 from modules.youtube_manager import get_ytdata, get_ytaudio
-from modules.whisper_data_class import *
 DEFAULT_MODEL_SIZE = "large-v3"
@@ -257,6 +257,7 @@ class WhisperInference(BaseInterface):
                                                 fp16=True if params.compute_type == "float16" else False,
                                                 best_of=params.best_of,
                                                 patience=params.patience,
                                                 progress_callback=progress_callback)["segments"]
         elapsed_time = time.time() - start_time

 from .base_interface import BaseInterface
 from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
 from modules.youtube_manager import get_ytdata, get_ytaudio
+from modules.whisper_parameter import *
 DEFAULT_MODEL_SIZE = "large-v3"
                                                 fp16=True if params.compute_type == "float16" else False,
                                                 best_of=params.best_of,
                                                 patience=params.patience,
+                                                temperature=params.temperature,
                                                 progress_callback=progress_callback)["segments"]
         elapsed_time = time.time() - start_time

modules/{whisper_data_class.py → whisper_parameter.py} RENAMED Viewed

@@ -16,9 +16,10 @@ class WhisperGradioComponents:
     patience: gr.Number
     condition_on_previous_text: gr.Checkbox
     initial_prompt: gr.Textbox
     """
-    A data class to pass Gradio components to the function before Gradio pre-processing.
-    See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
     Attributes
     ----------
@@ -62,12 +63,16 @@ class WhisperGradioComponents:
         Optional text to provide as a prompt for the first window. This can be used to provide, or
         "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
         to make it more likely to predict those word correctly.
     """
     def to_list(self) -> list:
         """
-        Converts the data class attributes into a list, to pass parameters to a
-        button click event function before Gradio pre-processing.
         Returns
         ----------
@@ -89,7 +94,8 @@ class WhisperValues:
     patience: float
     condition_on_previous_text: bool
     initial_prompt: Optional[str]
     """
-    A data class to use Whisper parameters in your function after Gradio pre-processing.
-    See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
     """

     patience: gr.Number
     condition_on_previous_text: gr.Checkbox
     initial_prompt: gr.Textbox
+    temperature: gr.Slider
     """
+    A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
+    See more about Gradio pre-processing: https://www.gradio.app/docs/components
     Attributes
     ----------
         Optional text to provide as a prompt for the first window. This can be used to provide, or
         "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
         to make it more likely to predict those word correctly.
+    temperature: Temperature for sampling. It can be a tuple of temperatures,
+            which will be successively used upon failures according to either
+            `compression_ratio_threshold` or `log_prob_threshold`.
     """
     def to_list(self) -> list:
         """
+        Converts the data class attributes into a list. Use "before" Gradio pre-processing.
+        See more about Gradio pre-processing: : https://www.gradio.app/docs/components
         Returns
         ----------
     patience: float
     condition_on_previous_text: bool
     initial_prompt: Optional[str]
+    temperature: float
     """
+    A data class to use Whisper parameters. Use "after" Gradio pre-processing.
+    See more about Gradio pre-processing: : https://www.gradio.app/docs/components
     """