Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

Jhjoon05 commited on Mar 3, 2023

Commit

63ab978

•

1 Parent(s): a908df0

Initial commit

Browse files

Files changed (13) hide show

Install.bat +22 -0
Install.sh +21 -0
Launch.bat +18 -0
Launch.sh +12 -0
app.py +85 -0
modules/__init__.py +0 -0
modules/model_Inference.py +126 -0
modules/subtitle_manager.py +43 -0
modules/youtube_manager.py +11 -0
outputs/outputs are saved here.txt +0 -0
requirements.txt +5 -0
ui/__init__.py +0 -0
ui/htmls.py +38 -0

Install.bat ADDED Viewed

	@@ -0,0 +1,22 @@

+@echo off
+if not exist "%~dp0\venv\Scripts" (
+    echo Creating venv...
+    python -m venv venv
+)
+echo checked the venv folder. now installing requirements..
+cd /d "%~dp0\venv\Scripts"
+call activate.bat
+cd /d "%~dp0"
+pip install -r requirements.txt
+if errorlevel 1 (
+    echo.
+    echo Requirements installation failed. please remove venv folder and run install.bat again.
+) else (
+    echo.
+    echo Requirements installed successfully.
+)
+pause

Install.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/bin/bash
+if [ ! -d "venv" ]; then
+    echo "Creating virtual environment..."
+    python -m venv venv
+fi
+source venv/bin/activate
+pip install -r requirements.txt
+if [ $? -ne 0 ]; then
+    echo ""
+    echo "Requirements installation failed. please remove venv folder and run install.sh again."
+    deactivate
+    exit 1
+fi
+echo ""
+echo "Requirements installed successfully."
+deactivate

Launch.bat ADDED Viewed

	@@ -0,0 +1,18 @@

+@echo off
+goto :activate_venv
+:launch
+%PYTHON% app.py %*
+pause
+:activate_venv
+set PYTHON="%~dp0\venv\Scripts\Python.exe"
+echo venv %PYTHON%
+goto :launch
+:endofscript
+echo.
+echo Launch unsuccessful. Exiting.
+pause

Launch.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/bin/bash
+source venv/bin/activate
+PYTHON="venv/bin/python"
+echo "venv ${PYTHON}"
+echo ""
+python app.py $*
+deactivate

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import gradio as gr
+from modules.model_Inference import ModelInference
+import whisper
+import os
+from ui.htmls import CSS,MARKDOWN
+from modules.youtube_manager import get_ytmetas
+def open_output_folder():
+    folder_path = "outputs"
+    if os.path.exists(folder_path):
+        os.system(f"start {folder_path}")
+    else:
+        print(f"The folder {folder_path} does not exist.")
+inf = ModelInference()
+block = gr.Blocks(css=CSS).queue(api_open=False)
+with block:
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown(MARKDOWN)
+    with gr.Tabs():
+        with gr.TabItem("File"): # tab1
+            with gr.Row():
+                input_file = gr.File(type="file", label="Upload File here")
+            with gr.Row():
+                dd_model = gr.Dropdown(choices=whisper.available_models(),value="large-v2",label="Model")
+                dd_lang = gr.Dropdown(choices=["Automatic Detection"]+sorted(list(whisper.tokenizer.LANGUAGES.values())),value="Automatic Detection",label="Language")
+                dd_subformat = gr.Dropdown(["SRT","WebVTT"],value="SRT",label="Subtitle Format")
+            with gr.Row():
+                cb_translate = gr.Checkbox(value=False,label="Translate to English?",interactive=True)
+            with gr.Row():
+                btn_run = gr.Button("GENERATE SUBTITLE FILE")
+            with gr.Row():
+                tb_indicator = gr.Textbox(label="Output")
+                btn_openfolder = gr.Button('📂').style(full_width=False)
+            btn_run.click(fn=inf.transcribe_file,inputs=[input_file,dd_model,dd_lang,dd_subformat,cb_translate],outputs=[tb_indicator])
+            btn_openfolder.click(fn=open_output_folder,inputs=[],outputs=[])
+        with gr.TabItem("Youtube"): # tab2
+            with gr.Row():
+                tb_youtubelink = gr.Textbox(label="Youtube Link" )
+            with gr.Row().style(equal_height=True):
+                with gr.Column():
+                    img_thumbnail = gr.Image(label="Youtube Thumbnail")
+                with gr.Column():
+                    tb_title = gr.Label(label="Youtube Title")
+                    tb_description = gr.Textbox(label="Youtube Description",max_lines=15)
+            with gr.Row():
+                dd_model = gr.Dropdown(choices=whisper.available_models(),value="large-v2",label="Model")
+                dd_lang = gr.Dropdown(choices=["Automatic Detection"]+sorted(list(whisper.tokenizer.LANGUAGES.values())),value="Automatic Detection",label="Language")
+                dd_subformat = gr.Dropdown(choices=["SRT","WebVTT"],value="SRT",label="Subtitle Format")
+            with gr.Row():
+                cb_translate = gr.Checkbox(value=False,label="Translate to English?",interactive=True)
+            with gr.Row():
+                btn_run = gr.Button("GENERATE SUBTITLE FILE")
+            with gr.Row():
+                tb_indicator = gr.Textbox(label="Output")
+                btn_openfolder = gr.Button('📂').style(full_width=False)
+            btn_run.click(fn=inf.transcribe_youtube,inputs=[tb_youtubelink,dd_model,dd_lang,dd_subformat,cb_translate],outputs=[tb_indicator])
+            tb_youtubelink.change(get_ytmetas,inputs=[tb_youtubelink],outputs=[img_thumbnail,tb_title,tb_description])
+            btn_openfolder.click(fn=open_output_folder,inputs=[],outputs=[])
+        with gr.TabItem("Mic"): # tab3
+            with gr.Row():
+                mic_input = gr.Microphone(label="Record with Mic",type="filepath",interactive=True)
+            with gr.Row():
+                dd_model = gr.Dropdown(choices=whisper.available_models(),value="large-v2",label="Model")
+                dd_lang = gr.Dropdown(choices=["Automatic Detection"]+sorted(list(whisper.tokenizer.LANGUAGES.values())),value="Automatic Detection",label="Language")
+                dd_subformat = gr.Dropdown(["SRT","WebVTT"],value="SRT",label="Subtitle Format")
+            with gr.Row():
+                cb_translate = gr.Checkbox(value=False,label="Translate to English?",interactive=True)
+            with gr.Row():
+                btn_run = gr.Button("GENERATE SUBTITLE FILE")
+            with gr.Row():
+                tb_indicator = gr.Textbox(label="Output")
+                btn_openfolder = gr.Button('📂').style(full_width=False)
+            btn_run.click(fn=inf.transcribe_mic,inputs=[mic_input,dd_model,dd_lang,dd_subformat,cb_translate],outputs=[tb_indicator])
+            btn_openfolder.click(fn=open_output_folder,inputs=[],outputs=[])
+block.launch()

modules/__init__.py ADDED Viewed

File without changes

modules/model_Inference.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import whisper
+from modules.subtitle_manager import get_srt,get_vtt,write_srt,write_vtt,safe_filename
+from modules.youtube_manager import get_ytdata,get_ytaudio
+import gradio as gr
+import os
+from datetime import datetime
+class ModelInference():
+    def __init__(self):
+        print("\nInitializing Model..\n")
+        self.default_model = "large-v2"
+        self.model = whisper.load_model(self.default_model)
+    def transcribe_file(self,fileobj
+                        ,model,lang,subformat,istranslate,
+                        progress=gr.Progress()):
+        def progress_callback(progress_value):
+            progress(progress_value,desc="Transcribing..")
+        if model != self.default_model or model==None:
+            progress(0,desc="Initializing Model..")
+            self.model = whisper.load_model(model)
+        if lang == "Automatic Detection" :
+            lang = None
+        progress(0,desc="Loading Audio..")
+        audio = whisper.load_audio(fileobj.name)
+        if istranslate == True:
+            result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
+        else :
+            result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
+        progress(1,desc="Completed!")
+        file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
+        file_name = file_name[:-9]
+        file_name = safe_filename(file_name)
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        output_path = f"outputs/{file_name}-{timestamp}"
+        if subformat == "SRT":
+            subtitle = get_srt(result["segments"])
+            write_srt(subtitle,f"{output_path}.srt")
+        elif subformat == "WebVTT":
+            subtitle = get_vtt(result["segments"])
+            write_vtt(subtitle,f"{output_path}.vtt")
+        return f"Done! Subtitle is in the outputs folder.\n\n{subtitle}"
+    def transcribe_youtube(self,youtubelink
+                        ,model,lang,subformat,istranslate,
+                        progress=gr.Progress()):
+        def progress_callback(progress_value):
+            progress(progress_value,desc="Transcribing..")
+        if model != self.default_model or model==None:
+            progress(0,desc="Initializing Model..")
+            self.model = whisper.load_model(model)
+        if lang == "Automatic Detection" :
+            lang = None
+        progress(0,desc="Loading Audio from Youtube..")
+        yt = get_ytdata(youtubelink)
+        audio = whisper.load_audio(get_ytaudio(yt))
+        if istranslate == True:
+            result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
+        else :
+            result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
+        progress(1,desc="Completed!")
+        file_name = safe_filename(yt.title)
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        output_path = f"outputs/{file_name}-{timestamp}"
+        if subformat == "SRT":
+            subtitle = get_srt(result["segments"])
+            write_srt(subtitle,f"{output_path}.srt")
+        elif subformat == "WebVTT":
+            subtitle = get_vtt(result["segments"])
+            write_vtt(subtitle,f"{output_path}.vtt")
+        return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
+    def transcribe_mic(self,micaudio
+                    ,model,lang,subformat,istranslate,
+                    progress=gr.Progress()):
+        def progress_callback(progress_value):
+            progress(progress_value,desc="Transcribing..")
+        if model != self.default_model or model==None:
+            progress(0,desc="Initializing Model..")
+            self.model = whisper.load_model(model)
+        if lang == "Automatic Detection" :
+            lang = None
+        progress(0,desc="Loading Audio..")
+        if istranslate == True:
+            result = self.model.transcribe(audio=micaudio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
+        else :
+            result = self.model.transcribe(audio=micaudio,language=lang,verbose=False,progress_callback=progress_callback)
+        progress(1,desc="Completed!")
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        output_path = f"outputs/Mic-{timestamp}"
+        if subformat == "SRT":
+            subtitle = get_srt(result["segments"])
+            write_srt(subtitle,f"{output_path}.srt")
+        elif subformat == "WebVTT":
+            subtitle = get_vtt(result["segments"])
+            write_vtt(subtitle,f"{output_path}.vtt")
+        return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"

modules/subtitle_manager.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import re
+def timeformat_srt(time):
+    hours = time//3600
+    minutes = (time - hours*3600)//60
+    seconds = time - hours*3600 - minutes*60
+    milliseconds = (time - int(time))*1000
+    return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
+def timeformat_vtt(time):
+    hours = time//3600
+    minutes = (time - hours*3600)//60
+    seconds = time - hours*3600 - minutes*60
+    milliseconds = (time - int(time))*1000
+    return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{int(milliseconds):03d}"
+def write_srt(subtitle,output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(subtitle)
+def write_vtt(subtitle,output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(subtitle)
+def get_srt(segments):
+    output = ""
+    for i, segment in enumerate(segments):
+        output += f"{i+1}\n"
+        output += f"{timeformat_srt(segment['start'])} --> {timeformat_srt(segment['end'])}\n"
+        output += f"{segment['text']}\n\n"
+    return output
+def get_vtt(segments):
+    output = "WebVTT\n\n"
+    for i, segment in enumerate(segments):
+        output += f"{i+1}\n"
+        output += f"{timeformat_vtt(segment['start'])} --> {timeformat_vtt(segment['end'])}\n"
+        output += f"{segment['text']}\n\n"
+    return output
+def safe_filename(name):
+    INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
+    return re.sub(INVALID_FILENAME_CHARS, '_', name)

modules/youtube_manager.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from pytube import YouTube
+def get_ytdata(link):
+    return YouTube(link)
+def get_ytmetas(link):
+    yt = YouTube(link)
+    return yt.thumbnail_url,yt.title,yt.description
+def get_ytaudio(ytdata:YouTube):
+    return ytdata.streams.get_audio_only().download(filename="modules/yt_tmp.wav")

outputs/outputs are saved here.txt ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+--extra-index-url https://download.pytorch.org/whl/cu117
+torch
+git+https://github.com/jhj0517/jhj0517-whisper.git
+gradio==3.19.1
+pytube

ui/__init__.py ADDED Viewed

File without changes

ui/htmls.py ADDED Viewed

	@@ -0,0 +1,38 @@

+CSS = """
+.bmc-button {
+    padding: 2px 5px;
+    border-radius: 5px;
+    background-color: #FF813F;
+    color: white;
+    box-shadow: 0px 1px 2px rgba(0, 0, 0, 0.3);
+    text-decoration: none;
+    display: inline-block;
+    font-size: 20px;
+    margin: 2px;
+    cursor: pointer;
+    -webkit-transition: background-color 0.3s ease;
+    -ms-transition: background-color 0.3s ease;
+    transition: background-color 0.3s ease;
+}
+.bmc-button:hover,
+.bmc-button:active,
+.bmc-button:focus {
+    background-color: #FF5633;
+}
+.markdown {
+    margin-bottom: 0;
+    padding-bottom: 0;
+}
+.tabs {
+    margin-top: 0;
+    padding-top: 0;
+}
+"""
+MARKDOWN = """
+### Whisper Web-UI
+Latest updates on <a href="https://github.com/jhj0517/Whsiper-WebUI">here.</a>  If you found this project useful, please consider supporting it. <br>
+<a class="bmc-button" href="https://www.buymeacoffee.com/jhj0517" target="_blank">
+<img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="30" width="140">
+</a>
+"""