Jhjoon05 commited on
Commit
63ab978
1 Parent(s): a908df0

Initial commit

Browse files
Install.bat ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ if not exist "%~dp0\venv\Scripts" (
4
+ echo Creating venv...
5
+ python -m venv venv
6
+ )
7
+
8
+ echo checked the venv folder. now installing requirements..
9
+ cd /d "%~dp0\venv\Scripts"
10
+ call activate.bat
11
+
12
+ cd /d "%~dp0"
13
+ pip install -r requirements.txt
14
+
15
+ if errorlevel 1 (
16
+ echo.
17
+ echo Requirements installation failed. please remove venv folder and run install.bat again.
18
+ ) else (
19
+ echo.
20
+ echo Requirements installed successfully.
21
+ )
22
+ pause
Install.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ if [ ! -d "venv" ]; then
4
+ echo "Creating virtual environment..."
5
+ python -m venv venv
6
+ fi
7
+
8
+ source venv/bin/activate
9
+ pip install -r requirements.txt
10
+
11
+ if [ $? -ne 0 ]; then
12
+ echo ""
13
+ echo "Requirements installation failed. please remove venv folder and run install.sh again."
14
+ deactivate
15
+ exit 1
16
+ fi
17
+
18
+ echo ""
19
+ echo "Requirements installed successfully."
20
+
21
+ deactivate
Launch.bat ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ goto :activate_venv
4
+
5
+ :launch
6
+ %PYTHON% app.py %*
7
+ pause
8
+
9
+ :activate_venv
10
+ set PYTHON="%~dp0\venv\Scripts\Python.exe"
11
+ echo venv %PYTHON%
12
+ goto :launch
13
+
14
+ :endofscript
15
+
16
+ echo.
17
+ echo Launch unsuccessful. Exiting.
18
+ pause
Launch.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ source venv/bin/activate
4
+
5
+ PYTHON="venv/bin/python"
6
+ echo "venv ${PYTHON}"
7
+ echo ""
8
+
9
+ python app.py $*
10
+
11
+ deactivate
12
+
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from modules.model_Inference import ModelInference
3
+ import whisper
4
+ import os
5
+ from ui.htmls import CSS,MARKDOWN
6
+ from modules.youtube_manager import get_ytmetas
7
+
8
+ def open_output_folder():
9
+ folder_path = "outputs"
10
+ if os.path.exists(folder_path):
11
+ os.system(f"start {folder_path}")
12
+ else:
13
+ print(f"The folder {folder_path} does not exist.")
14
+
15
+ inf = ModelInference()
16
+ block = gr.Blocks(css=CSS).queue(api_open=False)
17
+
18
+ with block:
19
+ with gr.Row():
20
+ with gr.Column():
21
+ gr.Markdown(MARKDOWN)
22
+ with gr.Tabs():
23
+ with gr.TabItem("File"): # tab1
24
+ with gr.Row():
25
+ input_file = gr.File(type="file", label="Upload File here")
26
+ with gr.Row():
27
+ dd_model = gr.Dropdown(choices=whisper.available_models(),value="large-v2",label="Model")
28
+ dd_lang = gr.Dropdown(choices=["Automatic Detection"]+sorted(list(whisper.tokenizer.LANGUAGES.values())),value="Automatic Detection",label="Language")
29
+ dd_subformat = gr.Dropdown(["SRT","WebVTT"],value="SRT",label="Subtitle Format")
30
+ with gr.Row():
31
+ cb_translate = gr.Checkbox(value=False,label="Translate to English?",interactive=True)
32
+ with gr.Row():
33
+ btn_run = gr.Button("GENERATE SUBTITLE FILE")
34
+ with gr.Row():
35
+ tb_indicator = gr.Textbox(label="Output")
36
+ btn_openfolder = gr.Button('📂').style(full_width=False)
37
+
38
+ btn_run.click(fn=inf.transcribe_file,inputs=[input_file,dd_model,dd_lang,dd_subformat,cb_translate],outputs=[tb_indicator])
39
+ btn_openfolder.click(fn=open_output_folder,inputs=[],outputs=[])
40
+
41
+ with gr.TabItem("Youtube"): # tab2
42
+ with gr.Row():
43
+ tb_youtubelink = gr.Textbox(label="Youtube Link" )
44
+ with gr.Row().style(equal_height=True):
45
+ with gr.Column():
46
+ img_thumbnail = gr.Image(label="Youtube Thumbnail")
47
+ with gr.Column():
48
+ tb_title = gr.Label(label="Youtube Title")
49
+ tb_description = gr.Textbox(label="Youtube Description",max_lines=15)
50
+ with gr.Row():
51
+ dd_model = gr.Dropdown(choices=whisper.available_models(),value="large-v2",label="Model")
52
+ dd_lang = gr.Dropdown(choices=["Automatic Detection"]+sorted(list(whisper.tokenizer.LANGUAGES.values())),value="Automatic Detection",label="Language")
53
+ dd_subformat = gr.Dropdown(choices=["SRT","WebVTT"],value="SRT",label="Subtitle Format")
54
+ with gr.Row():
55
+ cb_translate = gr.Checkbox(value=False,label="Translate to English?",interactive=True)
56
+ with gr.Row():
57
+ btn_run = gr.Button("GENERATE SUBTITLE FILE")
58
+ with gr.Row():
59
+ tb_indicator = gr.Textbox(label="Output")
60
+ btn_openfolder = gr.Button('📂').style(full_width=False)
61
+
62
+ btn_run.click(fn=inf.transcribe_youtube,inputs=[tb_youtubelink,dd_model,dd_lang,dd_subformat,cb_translate],outputs=[tb_indicator])
63
+ tb_youtubelink.change(get_ytmetas,inputs=[tb_youtubelink],outputs=[img_thumbnail,tb_title,tb_description])
64
+ btn_openfolder.click(fn=open_output_folder,inputs=[],outputs=[])
65
+
66
+ with gr.TabItem("Mic"): # tab3
67
+ with gr.Row():
68
+ mic_input = gr.Microphone(label="Record with Mic",type="filepath",interactive=True)
69
+ with gr.Row():
70
+ dd_model = gr.Dropdown(choices=whisper.available_models(),value="large-v2",label="Model")
71
+ dd_lang = gr.Dropdown(choices=["Automatic Detection"]+sorted(list(whisper.tokenizer.LANGUAGES.values())),value="Automatic Detection",label="Language")
72
+ dd_subformat = gr.Dropdown(["SRT","WebVTT"],value="SRT",label="Subtitle Format")
73
+ with gr.Row():
74
+ cb_translate = gr.Checkbox(value=False,label="Translate to English?",interactive=True)
75
+ with gr.Row():
76
+ btn_run = gr.Button("GENERATE SUBTITLE FILE")
77
+ with gr.Row():
78
+ tb_indicator = gr.Textbox(label="Output")
79
+ btn_openfolder = gr.Button('📂').style(full_width=False)
80
+
81
+ btn_run.click(fn=inf.transcribe_mic,inputs=[mic_input,dd_model,dd_lang,dd_subformat,cb_translate],outputs=[tb_indicator])
82
+ btn_openfolder.click(fn=open_output_folder,inputs=[],outputs=[])
83
+
84
+ block.launch()
85
+
modules/__init__.py ADDED
File without changes
modules/model_Inference.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from modules.subtitle_manager import get_srt,get_vtt,write_srt,write_vtt,safe_filename
3
+ from modules.youtube_manager import get_ytdata,get_ytaudio
4
+ import gradio as gr
5
+ import os
6
+ from datetime import datetime
7
+
8
+ class ModelInference():
9
+ def __init__(self):
10
+ print("\nInitializing Model..\n")
11
+ self.default_model = "large-v2"
12
+ self.model = whisper.load_model(self.default_model)
13
+
14
+ def transcribe_file(self,fileobj
15
+ ,model,lang,subformat,istranslate,
16
+ progress=gr.Progress()):
17
+
18
+ def progress_callback(progress_value):
19
+ progress(progress_value,desc="Transcribing..")
20
+
21
+ if model != self.default_model or model==None:
22
+ progress(0,desc="Initializing Model..")
23
+ self.model = whisper.load_model(model)
24
+
25
+ if lang == "Automatic Detection" :
26
+ lang = None
27
+
28
+ progress(0,desc="Loading Audio..")
29
+ audio = whisper.load_audio(fileobj.name)
30
+
31
+ if istranslate == True:
32
+ result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
33
+ else :
34
+ result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
35
+
36
+ progress(1,desc="Completed!")
37
+
38
+ file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
39
+ file_name = file_name[:-9]
40
+ file_name = safe_filename(file_name)
41
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
42
+ output_path = f"outputs/{file_name}-{timestamp}"
43
+
44
+ if subformat == "SRT":
45
+ subtitle = get_srt(result["segments"])
46
+ write_srt(subtitle,f"{output_path}.srt")
47
+ elif subformat == "WebVTT":
48
+ subtitle = get_vtt(result["segments"])
49
+ write_vtt(subtitle,f"{output_path}.vtt")
50
+
51
+ return f"Done! Subtitle is in the outputs folder.\n\n{subtitle}"
52
+
53
+ def transcribe_youtube(self,youtubelink
54
+ ,model,lang,subformat,istranslate,
55
+ progress=gr.Progress()):
56
+
57
+ def progress_callback(progress_value):
58
+ progress(progress_value,desc="Transcribing..")
59
+
60
+ if model != self.default_model or model==None:
61
+ progress(0,desc="Initializing Model..")
62
+ self.model = whisper.load_model(model)
63
+
64
+ if lang == "Automatic Detection" :
65
+ lang = None
66
+
67
+ progress(0,desc="Loading Audio from Youtube..")
68
+ yt = get_ytdata(youtubelink)
69
+ audio = whisper.load_audio(get_ytaudio(yt))
70
+
71
+ if istranslate == True:
72
+ result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
73
+ else :
74
+ result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
75
+
76
+ progress(1,desc="Completed!")
77
+
78
+ file_name = safe_filename(yt.title)
79
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
80
+ output_path = f"outputs/{file_name}-{timestamp}"
81
+
82
+ if subformat == "SRT":
83
+ subtitle = get_srt(result["segments"])
84
+ write_srt(subtitle,f"{output_path}.srt")
85
+ elif subformat == "WebVTT":
86
+ subtitle = get_vtt(result["segments"])
87
+ write_vtt(subtitle,f"{output_path}.vtt")
88
+
89
+ return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
90
+
91
+ def transcribe_mic(self,micaudio
92
+ ,model,lang,subformat,istranslate,
93
+ progress=gr.Progress()):
94
+
95
+ def progress_callback(progress_value):
96
+ progress(progress_value,desc="Transcribing..")
97
+
98
+ if model != self.default_model or model==None:
99
+ progress(0,desc="Initializing Model..")
100
+ self.model = whisper.load_model(model)
101
+
102
+ if lang == "Automatic Detection" :
103
+ lang = None
104
+
105
+ progress(0,desc="Loading Audio..")
106
+
107
+ if istranslate == True:
108
+ result = self.model.transcribe(audio=micaudio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
109
+ else :
110
+ result = self.model.transcribe(audio=micaudio,language=lang,verbose=False,progress_callback=progress_callback)
111
+
112
+ progress(1,desc="Completed!")
113
+
114
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
115
+ output_path = f"outputs/Mic-{timestamp}"
116
+
117
+ if subformat == "SRT":
118
+ subtitle = get_srt(result["segments"])
119
+ write_srt(subtitle,f"{output_path}.srt")
120
+ elif subformat == "WebVTT":
121
+ subtitle = get_vtt(result["segments"])
122
+ write_vtt(subtitle,f"{output_path}.vtt")
123
+
124
+ return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
125
+
126
+
modules/subtitle_manager.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def timeformat_srt(time):
4
+ hours = time//3600
5
+ minutes = (time - hours*3600)//60
6
+ seconds = time - hours*3600 - minutes*60
7
+ milliseconds = (time - int(time))*1000
8
+ return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
9
+
10
+ def timeformat_vtt(time):
11
+ hours = time//3600
12
+ minutes = (time - hours*3600)//60
13
+ seconds = time - hours*3600 - minutes*60
14
+ milliseconds = (time - int(time))*1000
15
+ return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{int(milliseconds):03d}"
16
+
17
+ def write_srt(subtitle,output_file):
18
+ with open(output_file, 'w', encoding='utf-8') as f:
19
+ f.write(subtitle)
20
+
21
+ def write_vtt(subtitle,output_file):
22
+ with open(output_file, 'w', encoding='utf-8') as f:
23
+ f.write(subtitle)
24
+
25
+ def get_srt(segments):
26
+ output = ""
27
+ for i, segment in enumerate(segments):
28
+ output += f"{i+1}\n"
29
+ output += f"{timeformat_srt(segment['start'])} --> {timeformat_srt(segment['end'])}\n"
30
+ output += f"{segment['text']}\n\n"
31
+ return output
32
+
33
+ def get_vtt(segments):
34
+ output = "WebVTT\n\n"
35
+ for i, segment in enumerate(segments):
36
+ output += f"{i+1}\n"
37
+ output += f"{timeformat_vtt(segment['start'])} --> {timeformat_vtt(segment['end'])}\n"
38
+ output += f"{segment['text']}\n\n"
39
+ return output
40
+
41
+ def safe_filename(name):
42
+ INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
43
+ return re.sub(INVALID_FILENAME_CHARS, '_', name)
modules/youtube_manager.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pytube import YouTube
2
+
3
+ def get_ytdata(link):
4
+ return YouTube(link)
5
+
6
+ def get_ytmetas(link):
7
+ yt = YouTube(link)
8
+ return yt.thumbnail_url,yt.title,yt.description
9
+
10
+ def get_ytaudio(ytdata:YouTube):
11
+ return ytdata.streams.get_audio_only().download(filename="modules/yt_tmp.wav")
outputs/outputs are saved here.txt ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu117
2
+ torch
3
+ git+https://github.com/jhj0517/jhj0517-whisper.git
4
+ gradio==3.19.1
5
+ pytube
ui/__init__.py ADDED
File without changes
ui/htmls.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CSS = """
2
+ .bmc-button {
3
+ padding: 2px 5px;
4
+ border-radius: 5px;
5
+ background-color: #FF813F;
6
+ color: white;
7
+ box-shadow: 0px 1px 2px rgba(0, 0, 0, 0.3);
8
+ text-decoration: none;
9
+ display: inline-block;
10
+ font-size: 20px;
11
+ margin: 2px;
12
+ cursor: pointer;
13
+ -webkit-transition: background-color 0.3s ease;
14
+ -ms-transition: background-color 0.3s ease;
15
+ transition: background-color 0.3s ease;
16
+ }
17
+ .bmc-button:hover,
18
+ .bmc-button:active,
19
+ .bmc-button:focus {
20
+ background-color: #FF5633;
21
+ }
22
+ .markdown {
23
+ margin-bottom: 0;
24
+ padding-bottom: 0;
25
+ }
26
+ .tabs {
27
+ margin-top: 0;
28
+ padding-top: 0;
29
+ }
30
+ """
31
+
32
+ MARKDOWN = """
33
+ ### Whisper Web-UI
34
+ Latest updates on <a href="https://github.com/jhj0517/Whsiper-WebUI">here.</a> If you found this project useful, please consider supporting it. <br>
35
+ <a class="bmc-button" href="https://www.buymeacoffee.com/jhj0517" target="_blank">
36
+ <img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="30" width="140">
37
+ </a>
38
+ """