app.py has added the following arguments:
Browse filesvad_max_merge_size, language, save_downloaded_files, and autolaunch.
vad_max_merge_size:
The number of VAD - Max Merge Size (s).
language:
The language spoken in the audio. Specify None to perform language detection.
save_downloaded_files:
True to move downloaded files to outputs.
autolaunch:
Open the webui URL in the system's default browser upon launch.
webui.bat, has been added.
This file is sourced from stable-diffusion-webui.
When downloading YouTube, the format has been changed from bestaudio to bestvideo[ext=mp4]+bestaudio[ext=m4a].
- .gitignore +6 -2
- app.py +21 -1
- dockerfile +2 -2
- requirements-fasterWhisper.txt +2 -2
- requirements-whisper.txt +2 -2
- requirements.txt +6 -6
- src/download.py +2 -2
- webui.bat +73 -0
.gitignore
CHANGED
@@ -1,6 +1,10 @@
|
|
1 |
-
# Byte-compiled / optimized / DLL files
|
2 |
__pycache__/
|
3 |
.vscode/
|
4 |
flagged/
|
5 |
*.py[cod]
|
6 |
-
*$py.class
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
__pycache__/
|
3 |
.vscode/
|
4 |
flagged/
|
5 |
*.py[cod]
|
6 |
+
*$py.class
|
7 |
+
.vs/
|
8 |
+
output/
|
9 |
+
tmp/
|
10 |
+
venv/
|
app.py
CHANGED
@@ -17,6 +17,7 @@ from src.config import VAD_INITIAL_PROMPT_MODE_VALUES, ApplicationConfig, VadIni
|
|
17 |
from src.hooks.progressListener import ProgressListener
|
18 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
19 |
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
|
|
20 |
from src.languages import get_language_names
|
21 |
from src.modelCache import ModelCache
|
22 |
from src.prompts.jsonPromptStrategy import JsonPromptStrategy
|
@@ -36,6 +37,8 @@ from src.vad import AbstractTranscription, NonSpeechStrategy, PeriodicTranscript
|
|
36 |
from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
|
37 |
from src.whisper.whisperFactory import create_whisper_container
|
38 |
|
|
|
|
|
39 |
# Configure more application defaults in config.json5
|
40 |
|
41 |
# Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
|
@@ -249,6 +252,14 @@ class WhisperTranscriber:
|
|
249 |
# Cleanup source
|
250 |
if self.deleteUploadedFiles:
|
251 |
for source in sources:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
print("Deleting source file " + source.source_path)
|
253 |
|
254 |
try:
|
@@ -571,7 +582,7 @@ def create_ui(app_config: ApplicationConfig):
|
|
571 |
else:
|
572 |
print("Queue mode disabled - progress bars will not be shown.")
|
573 |
|
574 |
-
demo.launch(share=app_config.share, server_name=app_config.server_name, server_port=app_config.server_port)
|
575 |
|
576 |
# Clean up
|
577 |
ui.close()
|
@@ -616,6 +627,15 @@ if __name__ == '__main__':
|
|
616 |
help="the compute type to use for inference")
|
617 |
parser.add_argument("--threads", type=optional_int, default=0,
|
618 |
help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
619 |
|
620 |
args = parser.parse_args().__dict__
|
621 |
|
|
|
17 |
from src.hooks.progressListener import ProgressListener
|
18 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
19 |
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
20 |
+
from src.languages import _TO_LANGUAGE_CODE
|
21 |
from src.languages import get_language_names
|
22 |
from src.modelCache import ModelCache
|
23 |
from src.prompts.jsonPromptStrategy import JsonPromptStrategy
|
|
|
37 |
from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
|
38 |
from src.whisper.whisperFactory import create_whisper_container
|
39 |
|
40 |
+
import shutil
|
41 |
+
|
42 |
# Configure more application defaults in config.json5
|
43 |
|
44 |
# Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
|
|
|
252 |
# Cleanup source
|
253 |
if self.deleteUploadedFiles:
|
254 |
for source in sources:
|
255 |
+
if self.app_config.save_downloaded_files and self.app_config.output_dir is not None and urlData:
|
256 |
+
print("Saving downloaded file [" + os.path.basename(source.source_path) + "]")
|
257 |
+
try:
|
258 |
+
shutil.copy(source.source_path, self.app_config.output_dir)
|
259 |
+
except Exception as e:
|
260 |
+
# Ignore error - it's just a cleanup
|
261 |
+
print("Error saving downloaded file " + source.source_path + ": " + str(e))
|
262 |
+
|
263 |
print("Deleting source file " + source.source_path)
|
264 |
|
265 |
try:
|
|
|
582 |
else:
|
583 |
print("Queue mode disabled - progress bars will not be shown.")
|
584 |
|
585 |
+
demo.launch(inbrowser=app_config.autolaunch, share=app_config.share, server_name=app_config.server_name, server_port=app_config.server_port)
|
586 |
|
587 |
# Clean up
|
588 |
ui.close()
|
|
|
627 |
help="the compute type to use for inference")
|
628 |
parser.add_argument("--threads", type=optional_int, default=0,
|
629 |
help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
|
630 |
+
parser.add_argument("--vad_max_merge_size", type=int, default=default_app_config.vad_max_merge_size, \
|
631 |
+
help="The number of VAD - Max Merge Size (s).") # 30
|
632 |
+
parser.add_argument("--language", type=str, default=None, choices=sorted(get_language_names()) + sorted([k.title() for k in _TO_LANGUAGE_CODE.keys()]),
|
633 |
+
help="language spoken in the audio, specify None to perform language detection")
|
634 |
+
parser.add_argument("--save_downloaded_files", action='store_true', \
|
635 |
+
help="True to move downloaded files to outputs.")
|
636 |
+
parser.add_argument("--autolaunch", action='store_true', \
|
637 |
+
help="open the webui URL in the system's default browser upon launch")
|
638 |
+
|
639 |
|
640 |
args = parser.parse_args().__dict__
|
641 |
|
dockerfile
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
FROM huggingface/transformers-pytorch-gpu
|
4 |
EXPOSE 7860
|
5 |
|
6 |
-
ARG WHISPER_IMPLEMENTATION=whisper
|
7 |
ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
|
8 |
|
9 |
ADD . /opt/whisper-webui/
|
@@ -27,4 +27,4 @@ ENV PYTHONUNBUFFERED=1
|
|
27 |
|
28 |
WORKDIR /opt/whisper-webui/
|
29 |
ENTRYPOINT ["python3"]
|
30 |
-
CMD ["app.py", "--input_audio_max_duration", "-1", "--server_name", "0.0.0.0", "--auto_parallel", "True"]
|
|
|
3 |
FROM huggingface/transformers-pytorch-gpu
|
4 |
EXPOSE 7860
|
5 |
|
6 |
+
ARG WHISPER_IMPLEMENTATION=faster-whisper
|
7 |
ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
|
8 |
|
9 |
ADD . /opt/whisper-webui/
|
|
|
27 |
|
28 |
WORKDIR /opt/whisper-webui/
|
29 |
ENTRYPOINT ["python3"]
|
30 |
+
CMD ["app.py", "--whisper_implementation", "faster-whisper", "--input_audio_max_duration", "-1", "--server_name", "0.0.0.0", "--auto_parallel", "True"]
|
requirements-fasterWhisper.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
ctranslate2
|
2 |
faster-whisper
|
3 |
ffmpeg-python==0.2.0
|
4 |
-
gradio==3.
|
5 |
yt-dlp
|
6 |
json5
|
7 |
torch
|
|
|
1 |
+
ctranslate2
|
2 |
faster-whisper
|
3 |
ffmpeg-python==0.2.0
|
4 |
+
gradio==3.27.0
|
5 |
yt-dlp
|
6 |
json5
|
7 |
torch
|
requirements-whisper.txt
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
git+https://github.com/huggingface/transformers
|
2 |
git+https://github.com/openai/whisper.git
|
3 |
transformers
|
4 |
ffmpeg-python==0.2.0
|
5 |
-
gradio==3.
|
6 |
yt-dlp
|
7 |
torchaudio
|
8 |
altair
|
|
|
1 |
+
git+https://github.com/huggingface/transformers
|
2 |
git+https://github.com/openai/whisper.git
|
3 |
transformers
|
4 |
ffmpeg-python==0.2.0
|
5 |
+
gradio==3.27.0
|
6 |
yt-dlp
|
7 |
torchaudio
|
8 |
altair
|
requirements.txt
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
transformers
|
4 |
ffmpeg-python==0.2.0
|
5 |
-
gradio==3.
|
6 |
yt-dlp
|
|
|
|
|
7 |
torchaudio
|
8 |
-
|
9 |
-
json5
|
|
|
1 |
+
ctranslate2
|
2 |
+
faster-whisper
|
|
|
3 |
ffmpeg-python==0.2.0
|
4 |
+
gradio==3.27.0
|
5 |
yt-dlp
|
6 |
+
json5
|
7 |
+
torch
|
8 |
torchaudio
|
9 |
+
more_itertools
|
|
src/download.py
CHANGED
@@ -23,13 +23,13 @@ def download_url(url: str, maxDuration: int = None, destinationDirectory: str =
|
|
23 |
return _perform_download(url, maxDuration=maxDuration, outputTemplate="%(title).10s %(id)s.%(ext)s")
|
24 |
pass
|
25 |
|
26 |
-
def _perform_download(url: str, maxDuration: int = None, outputTemplate: str = None, destinationDirectory: str = None, playlistItems: str = "1"):
|
27 |
# Create a temporary directory to store the downloaded files
|
28 |
if destinationDirectory is None:
|
29 |
destinationDirectory = mkdtemp()
|
30 |
|
31 |
ydl_opts = {
|
32 |
-
"format": "bestaudio/best",
|
33 |
'paths': {
|
34 |
'home': destinationDirectory
|
35 |
}
|
|
|
23 |
return _perform_download(url, maxDuration=maxDuration, outputTemplate="%(title).10s %(id)s.%(ext)s")
|
24 |
pass
|
25 |
|
26 |
+
def _perform_download(url: str, maxDuration: int = None, outputTemplate: str = None, destinationDirectory: str = None, playlistItems: str = "1", onlyAudio: bool = False):
|
27 |
# Create a temporary directory to store the downloaded files
|
28 |
if destinationDirectory is None:
|
29 |
destinationDirectory = mkdtemp()
|
30 |
|
31 |
ydl_opts = {
|
32 |
+
"format": "bestaudio/best" if onlyAudio else "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best",
|
33 |
'paths': {
|
34 |
'home': destinationDirectory
|
35 |
}
|
webui.bat
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
|
3 |
+
:: The source of the webui.bat file is stable-diffusion-webui
|
4 |
+
set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --save_downloaded_files --autolaunch
|
5 |
+
|
6 |
+
if not defined PYTHON (set PYTHON=python)
|
7 |
+
if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
|
8 |
+
|
9 |
+
mkdir tmp 2>NUL
|
10 |
+
|
11 |
+
%PYTHON% -c "" >tmp/stdout.txt 2>tmp/stderr.txt
|
12 |
+
if %ERRORLEVEL% == 0 goto :check_pip
|
13 |
+
echo Couldn't launch python
|
14 |
+
goto :show_stdout_stderr
|
15 |
+
|
16 |
+
:check_pip
|
17 |
+
%PYTHON% -mpip --help >tmp/stdout.txt 2>tmp/stderr.txt
|
18 |
+
if %ERRORLEVEL% == 0 goto :start_venv
|
19 |
+
if "%PIP_INSTALLER_LOCATION%" == "" goto :show_stdout_stderr
|
20 |
+
%PYTHON% "%PIP_INSTALLER_LOCATION%" >tmp/stdout.txt 2>tmp/stderr.txt
|
21 |
+
if %ERRORLEVEL% == 0 goto :start_venv
|
22 |
+
echo Couldn't install pip
|
23 |
+
goto :show_stdout_stderr
|
24 |
+
|
25 |
+
:start_venv
|
26 |
+
if ["%VENV_DIR%"] == ["-"] goto :skip_venv
|
27 |
+
if ["%SKIP_VENV%"] == ["1"] goto :skip_venv
|
28 |
+
|
29 |
+
dir "%VENV_DIR%\Scripts\Python.exe" >tmp/stdout.txt 2>tmp/stderr.txt
|
30 |
+
if %ERRORLEVEL% == 0 goto :activate_venv
|
31 |
+
|
32 |
+
for /f "delims=" %%i in ('CALL %PYTHON% -c "import sys; print(sys.executable)"') do set PYTHON_FULLNAME="%%i"
|
33 |
+
echo Creating venv in directory %VENV_DIR% using python %PYTHON_FULLNAME%
|
34 |
+
%PYTHON_FULLNAME% -m venv "%VENV_DIR%" >tmp/stdout.txt 2>tmp/stderr.txt
|
35 |
+
if %ERRORLEVEL% == 0 goto :activate_venv
|
36 |
+
echo Unable to create venv in directory "%VENV_DIR%"
|
37 |
+
goto :show_stdout_stderr
|
38 |
+
|
39 |
+
:activate_venv
|
40 |
+
set PYTHON="%VENV_DIR%\Scripts\Python.exe"
|
41 |
+
echo venv %PYTHON%
|
42 |
+
|
43 |
+
:skip_venv
|
44 |
+
goto :launch
|
45 |
+
|
46 |
+
:launch
|
47 |
+
%PYTHON% app.py %COMMANDLINE_ARGS% %*
|
48 |
+
pause
|
49 |
+
exit /b
|
50 |
+
|
51 |
+
:show_stdout_stderr
|
52 |
+
|
53 |
+
echo.
|
54 |
+
echo exit code: %errorlevel%
|
55 |
+
|
56 |
+
for /f %%i in ("tmp\stdout.txt") do set size=%%~zi
|
57 |
+
if %size% equ 0 goto :show_stderr
|
58 |
+
echo.
|
59 |
+
echo stdout:
|
60 |
+
type tmp\stdout.txt
|
61 |
+
|
62 |
+
:show_stderr
|
63 |
+
for /f %%i in ("tmp\stderr.txt") do set size=%%~zi
|
64 |
+
if %size% equ 0 goto :show_stderr
|
65 |
+
echo.
|
66 |
+
echo stderr:
|
67 |
+
type tmp\stderr.txt
|
68 |
+
|
69 |
+
:endofscript
|
70 |
+
|
71 |
+
echo.
|
72 |
+
echo Launch unsuccessful. Exiting.
|
73 |
+
pause
|